Discussion:
[Mesa-dev] [PATCH v2 1/8] amd/common:add uvd hevc enc support check in hw query
James Zhu
2018-02-06 20:05:38 UTC
Permalink
Based on amdgpu hardware query information to check if UVD hevc enc support

Signed-off-by: James Zhu <***@amd.com>
---
src/amd/common/ac_gpu_info.c | 10 +++++++++-
src/amd/common/ac_gpu_info.h | 1 +
2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 6d9dcb5..2494967 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
{
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
- struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
+ struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
int r, i, j;
drmDevicePtr devinfo;
@@ -166,6 +166,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
return false;
}

+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, &uvd_enc);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n");
+ return false;
+ }
+
if (info->drm_major == 3 && info->drm_minor >= 17) {
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec);
if (r) {
@@ -275,6 +281,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
uvd.available_rings ? uvd_version : 0;
info->vce_fw_version =
vce.available_rings ? vce_version : 0;
+ info->uvd_enc_supported =
+ uvd_enc.available_rings ? true : false;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index cca3e98..36714ee 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -65,6 +65,7 @@ struct radeon_info {
uint32_t num_compute_rings;
uint32_t uvd_fw_version;
uint32_t vce_fw_version;
+ bool uvd_enc_supported;
uint32_t me_fw_version;
uint32_t me_fw_feature;
uint32_t pfp_fw_version;
--
2.7.4
James Zhu
2018-02-06 20:05:40 UTC
Permalink
Add hevc encode hardware interface for UVD

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++++++++++++++++++++
1 file changed, 471 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.h b/src/gallium/drivers/radeon/radeon_uvd_enc.h
new file mode 100644
index 0000000..1cca0d2
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.h
@@ -0,0 +1,471 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _RADEON_UVD_ENC_H
+#define _RADEON_UVD_ENC_H
+
+#define RENC_UVD_FW_INTERFACE_MAJOR_VERSION 1
+#define RENC_UVD_FW_INTERFACE_MINOR_VERSION 1
+
+#define RENC_UVD_IB_PARAM_SESSION_INFO 0x00000001
+#define RENC_UVD_IB_PARAM_TASK_INFO 0x00000002
+#define RENC_UVD_IB_PARAM_SESSION_INIT 0x00000003
+#define RENC_UVD_IB_PARAM_LAYER_CONTROL 0x00000004
+#define RENC_UVD_IB_PARAM_LAYER_SELECT 0x00000005
+#define RENC_UVD_IB_PARAM_SLICE_CONTROL 0x00000006
+#define RENC_UVD_IB_PARAM_SPEC_MISC 0x00000007
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000008
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000009
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0000000a
+#define RENC_UVD_IB_PARAM_SLICE_HEADER 0x0000000b
+#define RENC_UVD_IB_PARAM_ENCODE_PARAMS 0x0000000c
+#define RENC_UVD_IB_PARAM_QUALITY_PARAMS 0x0000000d
+#define RENC_UVD_IB_PARAM_DEBLOCKING_FILTER 0x0000000e
+#define RENC_UVD_IB_PARAM_INTRA_REFRESH 0x0000000f
+#define RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000010
+#define RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000011
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER 0x00000012
+#define RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER 0x00000013
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER_ADDITIONAL 0x00000014
+
+#define RENC_UVD_IB_OP_INITIALIZE 0x08000001
+#define RENC_UVD_IB_OP_CLOSE_SESSION 0x08000002
+#define RENC_UVD_IB_OP_ENCODE 0x08000003
+#define RENC_UVD_IB_OP_INIT_RC 0x08000004
+#define RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x08000005
+#define RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE 0x08000006
+#define RENC_UVD_IB_OP_SET_BALANCE_ENCODING_MODE 0x08000007
+#define RENC_UVD_IB_OP_SET_QUALITY_ENCODING_MODE 0x08000008
+
+#define RENC_UVD_IF_MAJOR_VERSION_MASK 0xFFFF0000
+#define RENC_UVD_IF_MAJOR_VERSION_SHIFT 16
+#define RENC_UVD_IF_MINOR_VERSION_MASK 0x0000FFFF
+#define RENC_UVD_IF_MINOR_VERSION_SHIFT 0
+
+#define RENC_UVD_PREENCODE_MODE_NONE 0x00000000
+#define RENC_UVD_PREENCODE_MODE_1X 0x00000001
+#define RENC_UVD_PREENCODE_MODE_2X 0x00000002
+#define RENC_UVD_PREENCODE_MODE_4X 0x00000004
+
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001
+
+#define RENC_UVD_RATE_CONTROL_METHOD_NONE 0x00000000
+#define RENC_UVD_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001
+#define RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002
+#define RENC_UVD_RATE_CONTROL_METHOD_CBR 0x00000003
+
+#define RENC_UVD_NALU_TYPE_AUD 0x00000001
+#define RENC_UVD_NALU_TYPE_VPS 0x00000002
+#define RENC_UVD_NALU_TYPE_SPS 0x00000003
+#define RENC_UVD_NALU_TYPE_PPS 0x00000004
+#define RENC_UVD_NALU_TYPE_END_OF_SEQUENCE 0x00000005
+
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16
+
+#define RENC_UVD_HEADER_INSTRUCTION_END 0
+#define RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 1
+#define RENC_UVD_HEADER_INSTRUCTION_COPY 2
+#define RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE 3
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT 4
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA 5
+
+#define RENC_UVD_PICTURE_TYPE_B 0
+#define RENC_UVD_PICTURE_TYPE_P 1
+#define RENC_UVD_PICTURE_TYPE_I 2
+#define RENC_UVD_PICTURE_TYPE_P_SKIP 3
+
+#define RENC_UVD_SWIZZLE_MODE_LINEAR 0
+#define RENC_UVD_SWIZZLE_MODE_256B_D 2
+#define RENC_UVD_SWIZZLE_MODE_4kB_D 6
+#define RENC_UVD_SWIZZLE_MODE_64kB_D 10
+#define RENC_UVD_INTRA_REFRESH_MODE_NONE 0
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_ROWS 1
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2
+
+#define RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES 34
+#define RENC_UVD_ADDR_MODE_LINEAR 0
+#define RENC_UVD_ADDR_MODE_PELE_8X8_1D 1
+#define RENC_UVD_ADDR_MODE_32AS8_88 2
+
+#define RENC_UVD_ARRAY_MODE_LINEAR 0
+#define RENC_UVD_ARRAY_MODE_PELE_8X8_1D 2
+#define RENC_UVD_ARRAY_MODE_2D_TILED_THIN1 4
+
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_STATUS_OK 0x00000000
+#define RENC_UVD_FEEDBACK_STATUS_NOT_ENCODED 0x10000001
+
+typedef struct radeon_uvd_enc_feedback_s
+{
+ uint32_t task_id;
+ uint32_t first_in_task;
+ uint32_t last_in_task;
+ uint32_t status;
+ uint32_t has_bitstream;
+ uint32_t bitstream_offset;
+ uint32_t bitstream_size;
+ uint32_t enabled_filler_data;
+ uint32_t filler_data_size;
+ uint32_t extra_bytes;
+} radeon_uvd_enc_feedback_t;
+
+typedef struct ruvd_enc_session_info_s
+{
+ uint32_t reserved;
+ uint32_t interface_version;
+ uint32_t sw_context_address_hi;
+ uint32_t sw_context_address_lo;
+} ruvd_enc_session_info_t;
+
+typedef struct ruvd_enc_task_info_s
+{
+ uint32_t total_size_of_all_packages;
+ uint32_t task_id;
+ uint32_t allowed_max_num_feedbacks;
+} ruvd_enc_task_info_t;
+
+typedef struct ruvd_enc_session_init_s
+{
+ uint32_t aligned_picture_width;
+ uint32_t aligned_picture_height;
+ uint32_t padding_width;
+ uint32_t padding_height;
+ uint32_t pre_encode_mode;
+ uint32_t pre_encode_chroma_enabled;
+} ruvd_enc_session_init_t;
+
+typedef struct ruvd_enc_layer_control_s
+{
+ uint32_t max_num_temporal_layers;
+ uint32_t num_temporal_layers;
+} ruvd_enc_layer_control_t;
+
+typedef struct ruvd_enc_layer_select_s
+{
+ uint32_t temporal_layer_index;
+} ruvd_enc_layer_select_t;
+
+typedef struct ruvd_enc_hevc_slice_control_s
+{
+ uint32_t slice_control_mode;
+ union
+ {
+ struct
+ {
+ uint32_t num_ctbs_per_slice;
+ uint32_t num_ctbs_per_slice_segment;
+ } fixed_ctbs_per_slice;
+
+ struct
+ {
+ uint32_t num_bits_per_slice;
+ uint32_t num_bits_per_slice_segment;
+ } fixed_bits_per_slice;
+ };
+} ruvd_enc_hevc_slice_control_t;
+
+typedef struct ruvd_enc_hevc_spec_misc_s
+{
+ uint32_t log2_min_luma_coding_block_size_minus3;
+ uint32_t amp_disabled;
+ uint32_t strong_intra_smoothing_enabled;
+ uint32_t constrained_intra_pred_flag;
+ uint32_t cabac_init_flag;
+ uint32_t half_pel_enabled;
+ uint32_t quarter_pel_enabled;
+} ruvd_enc_hevc_spec_misc_t;
+
+typedef struct ruvd_enc_rate_ctl_session_init_s
+{
+ uint32_t rate_control_method;
+ uint32_t vbv_buffer_level;
+} ruvd_enc_rate_ctl_session_init_t;
+
+typedef struct ruvd_enc_rate_ctl_layer_init_s
+{
+ uint32_t target_bit_rate;
+ uint32_t peak_bit_rate;
+ uint32_t frame_rate_num;
+ uint32_t frame_rate_den;
+ uint32_t vbv_buffer_size;
+ uint32_t avg_target_bits_per_picture;
+ uint32_t peak_bits_per_picture_integer;
+ uint32_t peak_bits_per_picture_fractional;
+} ruvd_enc_rate_ctl_layer_init_t;
+
+typedef struct ruvd_enc_rate_ctl_per_picture_s
+{
+ uint32_t qp;
+ uint32_t min_qp_app;
+ uint32_t max_qp_app;
+ uint32_t max_au_size;
+ uint32_t enabled_filler_data;
+ uint32_t skip_frame_enable;
+ uint32_t enforce_hrd;
+} ruvd_enc_rate_ctl_per_picture_t;
+
+typedef struct ruvd_enc_quality_params_s
+{
+ uint32_t vbaq_mode;
+ uint32_t scene_change_sensitivity;
+ uint32_t scene_change_min_idr_interval;
+} ruvd_enc_quality_params_t;
+
+typedef struct ruvd_enc_direct_output_nalu_s
+{
+ uint32_t type;
+ uint32_t size;
+ uint32_t data[1];
+} ruvd_enc_direct_output_nalu_t;
+
+typedef struct ruvd_enc_slice_header_s
+{
+ uint32_t
+ bitstream_template
+ [RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS];
+ struct
+ {
+ uint32_t instruction;
+ uint32_t num_bits;
+ } instructions[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS];
+} ruvd_enc_slice_header_t;
+
+typedef struct ruvd_enc_encode_params_s
+{
+ uint32_t pic_type;
+ uint32_t allowed_max_bitstream_size;
+ uint32_t input_picture_luma_address_hi;
+ uint32_t input_picture_luma_address_lo;
+ uint32_t input_picture_chroma_address_hi;
+ uint32_t input_picture_chroma_address_lo;
+ uint32_t input_pic_luma_pitch;
+ uint32_t input_pic_chroma_pitch;
+ union
+ {
+ uint32_t input_pic_addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t input_pic_array_mode;
+ uint32_t input_pic_swizzle_mode;
+ };
+ uint32_t reference_picture_index;
+ uint32_t reconstructed_picture_index;
+} ruvd_enc_encode_params_t;
+
+typedef struct ruvd_enc_hevc_deblocking_filter_s
+{
+ uint32_t loop_filter_across_slices_enabled;
+ int32_t deblocking_filter_disabled;
+ int32_t beta_offset_div2;
+ int32_t tc_offset_div2;
+ int32_t cb_qp_offset;
+ int32_t cr_qp_offset;
+} ruvd_enc_hevc_deblocking_filter_t;
+
+typedef struct ruvd_enc_intra_refresh_s
+{
+ uint32_t intra_refresh_mode;
+ uint32_t offset;
+ uint32_t region_size;
+} ruvd_enc_intra_refresh_t;
+
+typedef struct ruvd_enc_reconstructed_picture_s
+{
+ uint32_t luma_offset;
+ uint32_t chroma_offset;
+} ruvd_enc_reconstructed_picture_t;
+
+typedef struct ruvd_enc_encode_context_buffer_s
+{
+ uint32_t encode_context_address_hi;
+ uint32_t encode_context_address_lo;
+ union
+ {
+ uint32_t addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t array_mode;
+ uint32_t swizzle_mode;
+ };
+ uint32_t rec_luma_pitch;
+ uint32_t rec_chroma_pitch;
+ uint32_t num_reconstructed_pictures;
+ ruvd_enc_reconstructed_picture_t
+ reconstructed_pictures[RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ uint32_t pre_encode_picture_luma_pitch;
+ uint32_t pre_encode_picture_chroma_pitch;
+ ruvd_enc_reconstructed_picture_t
+ pre_encode_reconstructed_pictures
+ [RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ ruvd_enc_reconstructed_picture_t pre_encode_input_picture;
+} ruvd_enc_encode_context_buffer_t;
+
+typedef struct ruvd_enc_video_bitstream_buffer_s
+{
+ uint32_t mode;
+ uint32_t video_bitstream_buffer_address_hi;
+ uint32_t video_bitstream_buffer_address_lo;
+ uint32_t video_bitstream_buffer_size;
+ uint32_t video_bitstream_data_offset;
+} ruvd_enc_video_bitstream_buffer_t;
+
+typedef struct ruvd_enc_feedback_buffer_s
+{
+ uint32_t mode;
+ uint32_t feedback_buffer_address_hi;
+ uint32_t feedback_buffer_address_lo;
+ uint32_t feedback_buffer_size;
+ uint32_t feedback_data_size;
+} ruvd_enc_feedback_buffer_t;
+
+typedef void (*radeon_uvd_enc_get_buffer) (struct pipe_resource * resource,
+ struct pb_buffer ** handle,
+ struct radeon_surf ** surface);
+
+struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context
+ *context,
+ const struct
+ pipe_video_codec *templat,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer
+ get_buffer);
+
+struct radeon_uvd_enc_pic
+{
+ enum pipe_h265_enc_picture_type picture_type;
+
+ unsigned frame_num;
+ unsigned pic_order_cnt;
+ unsigned pic_order_cnt_type;
+ unsigned ref_idx_l0;
+ unsigned ref_idx_l1;
+ unsigned crop_left;
+ unsigned crop_right;
+ unsigned crop_top;
+ unsigned crop_bottom;
+ unsigned general_tier_flag;
+ unsigned general_profile_idc;
+ unsigned general_level_idc;
+ unsigned max_poc;
+ unsigned log2_max_poc;
+ unsigned chroma_format_idc;
+ unsigned pic_width_in_luma_samples;
+ unsigned pic_height_in_luma_samples;
+ unsigned log2_diff_max_min_luma_coding_block_size;
+ unsigned log2_min_transform_block_size_minus2;
+ unsigned log2_diff_max_min_transform_block_size;
+ unsigned max_transform_hierarchy_depth_inter;
+ unsigned max_transform_hierarchy_depth_intra;
+ unsigned log2_parallel_merge_level_minus2;
+ unsigned bit_depth_luma_minus8;
+ unsigned bit_depth_chroma_minus8;
+ unsigned nal_unit_type;
+ unsigned max_num_merge_cand;
+
+ bool not_referenced;
+ bool is_idr;
+ bool is_even_frame;
+ bool sample_adaptive_offset_enabled_flag;
+ bool pcm_enabled_flag;
+ bool sps_temporal_mvp_enabled_flag;
+
+ ruvd_enc_task_info_t task_info;
+ ruvd_enc_session_init_t session_init;
+ ruvd_enc_layer_control_t layer_ctrl;
+ ruvd_enc_layer_select_t layer_sel;
+ ruvd_enc_hevc_slice_control_t hevc_slice_ctrl;
+ ruvd_enc_hevc_spec_misc_t hevc_spec_misc;
+ ruvd_enc_rate_ctl_session_init_t rc_session_init;
+ ruvd_enc_rate_ctl_layer_init_t rc_layer_init;
+ ruvd_enc_hevc_deblocking_filter_t hevc_deblock;
+ ruvd_enc_rate_ctl_per_picture_t rc_per_pic;
+ ruvd_enc_quality_params_t quality_params;
+ ruvd_enc_encode_context_buffer_t ctx_buf;
+ ruvd_enc_video_bitstream_buffer_t bit_buf;
+ ruvd_enc_feedback_buffer_t fb_buf;
+ ruvd_enc_intra_refresh_t intra_ref;
+ ruvd_enc_encode_params_t enc_params;
+};
+
+struct radeon_uvd_encoder
+{
+ struct pipe_video_codec base;
+
+ void (*begin) (struct radeon_uvd_encoder * enc,
+ struct pipe_picture_desc * pic);
+ void (*encode) (struct radeon_uvd_encoder * enc);
+ void (*destroy) (struct radeon_uvd_encoder * enc);
+
+ unsigned stream_handle;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys *ws;
+ struct radeon_winsys_cs *cs;
+
+ radeon_uvd_enc_get_buffer get_buffer;
+
+ struct pb_buffer *handle;
+ struct radeon_surf *luma;
+ struct radeon_surf *chroma;
+
+ struct pb_buffer *bs_handle;
+ unsigned bs_size;
+
+ unsigned cpb_num;
+
+ struct rvid_buffer *si;
+ struct rvid_buffer *fb;
+ struct rvid_buffer cpb;
+ struct radeon_uvd_enc_pic enc_pic;
+
+ unsigned shifter;
+ unsigned bits_in_shifter;
+ unsigned num_zeros;
+ unsigned byte_index;
+ unsigned bits_output;
+ uint32_t total_task_size;
+ uint32_t *p_task_size;
+
+ bool emulation_prevention;
+ bool need_feedback;
+};
+
+void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc);
+bool si_radeon_uvd_enc_supported(struct si_screen *rscreen);
+
+#endif // _RADEON_UVD_ENC_H
--
2.7.4
Boyuan Zhang
2018-02-07 22:40:42 UTC
Permalink
Better to add it to Makefile.source and Meson in this patch. Other than
this,
Post by James Zhu
Add hevc encode hardware interface for UVD
---
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++++++++++++++++++++
1 file changed, 471 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.h b/src/gallium/drivers/radeon/radeon_uvd_enc.h
new file mode 100644
index 0000000..1cca0d2
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.h
@@ -0,0 +1,471 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _RADEON_UVD_ENC_H
+#define _RADEON_UVD_ENC_H
+
+#define RENC_UVD_FW_INTERFACE_MAJOR_VERSION 1
+#define RENC_UVD_FW_INTERFACE_MINOR_VERSION 1
+
+#define RENC_UVD_IB_PARAM_SESSION_INFO 0x00000001
+#define RENC_UVD_IB_PARAM_TASK_INFO 0x00000002
+#define RENC_UVD_IB_PARAM_SESSION_INIT 0x00000003
+#define RENC_UVD_IB_PARAM_LAYER_CONTROL 0x00000004
+#define RENC_UVD_IB_PARAM_LAYER_SELECT 0x00000005
+#define RENC_UVD_IB_PARAM_SLICE_CONTROL 0x00000006
+#define RENC_UVD_IB_PARAM_SPEC_MISC 0x00000007
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000008
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000009
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0000000a
+#define RENC_UVD_IB_PARAM_SLICE_HEADER 0x0000000b
+#define RENC_UVD_IB_PARAM_ENCODE_PARAMS 0x0000000c
+#define RENC_UVD_IB_PARAM_QUALITY_PARAMS 0x0000000d
+#define RENC_UVD_IB_PARAM_DEBLOCKING_FILTER 0x0000000e
+#define RENC_UVD_IB_PARAM_INTRA_REFRESH 0x0000000f
+#define RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000010
+#define RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000011
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER 0x00000012
+#define RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER 0x00000013
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER_ADDITIONAL 0x00000014
+
+#define RENC_UVD_IB_OP_INITIALIZE 0x08000001
+#define RENC_UVD_IB_OP_CLOSE_SESSION 0x08000002
+#define RENC_UVD_IB_OP_ENCODE 0x08000003
+#define RENC_UVD_IB_OP_INIT_RC 0x08000004
+#define RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x08000005
+#define RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE 0x08000006
+#define RENC_UVD_IB_OP_SET_BALANCE_ENCODING_MODE 0x08000007
+#define RENC_UVD_IB_OP_SET_QUALITY_ENCODING_MODE 0x08000008
+
+#define RENC_UVD_IF_MAJOR_VERSION_MASK 0xFFFF0000
+#define RENC_UVD_IF_MAJOR_VERSION_SHIFT 16
+#define RENC_UVD_IF_MINOR_VERSION_MASK 0x0000FFFF
+#define RENC_UVD_IF_MINOR_VERSION_SHIFT 0
+
+#define RENC_UVD_PREENCODE_MODE_NONE 0x00000000
+#define RENC_UVD_PREENCODE_MODE_1X 0x00000001
+#define RENC_UVD_PREENCODE_MODE_2X 0x00000002
+#define RENC_UVD_PREENCODE_MODE_4X 0x00000004
+
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001
+
+#define RENC_UVD_RATE_CONTROL_METHOD_NONE 0x00000000
+#define RENC_UVD_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001
+#define RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002
+#define RENC_UVD_RATE_CONTROL_METHOD_CBR 0x00000003
+
+#define RENC_UVD_NALU_TYPE_AUD 0x00000001
+#define RENC_UVD_NALU_TYPE_VPS 0x00000002
+#define RENC_UVD_NALU_TYPE_SPS 0x00000003
+#define RENC_UVD_NALU_TYPE_PPS 0x00000004
+#define RENC_UVD_NALU_TYPE_END_OF_SEQUENCE 0x00000005
+
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16
+
+#define RENC_UVD_HEADER_INSTRUCTION_END 0
+#define RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 1
+#define RENC_UVD_HEADER_INSTRUCTION_COPY 2
+#define RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE 3
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT 4
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA 5
+
+#define RENC_UVD_PICTURE_TYPE_B 0
+#define RENC_UVD_PICTURE_TYPE_P 1
+#define RENC_UVD_PICTURE_TYPE_I 2
+#define RENC_UVD_PICTURE_TYPE_P_SKIP 3
+
+#define RENC_UVD_SWIZZLE_MODE_LINEAR 0
+#define RENC_UVD_SWIZZLE_MODE_256B_D 2
+#define RENC_UVD_SWIZZLE_MODE_4kB_D 6
+#define RENC_UVD_SWIZZLE_MODE_64kB_D 10
+#define RENC_UVD_INTRA_REFRESH_MODE_NONE 0
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_ROWS 1
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2
+
+#define RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES 34
+#define RENC_UVD_ADDR_MODE_LINEAR 0
+#define RENC_UVD_ADDR_MODE_PELE_8X8_1D 1
+#define RENC_UVD_ADDR_MODE_32AS8_88 2
+
+#define RENC_UVD_ARRAY_MODE_LINEAR 0
+#define RENC_UVD_ARRAY_MODE_PELE_8X8_1D 2
+#define RENC_UVD_ARRAY_MODE_2D_TILED_THIN1 4
+
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_STATUS_OK 0x00000000
+#define RENC_UVD_FEEDBACK_STATUS_NOT_ENCODED 0x10000001
+
+typedef struct radeon_uvd_enc_feedback_s
+{
+ uint32_t task_id;
+ uint32_t first_in_task;
+ uint32_t last_in_task;
+ uint32_t status;
+ uint32_t has_bitstream;
+ uint32_t bitstream_offset;
+ uint32_t bitstream_size;
+ uint32_t enabled_filler_data;
+ uint32_t filler_data_size;
+ uint32_t extra_bytes;
+} radeon_uvd_enc_feedback_t;
+
+typedef struct ruvd_enc_session_info_s
+{
+ uint32_t reserved;
+ uint32_t interface_version;
+ uint32_t sw_context_address_hi;
+ uint32_t sw_context_address_lo;
+} ruvd_enc_session_info_t;
+
+typedef struct ruvd_enc_task_info_s
+{
+ uint32_t total_size_of_all_packages;
+ uint32_t task_id;
+ uint32_t allowed_max_num_feedbacks;
+} ruvd_enc_task_info_t;
+
+typedef struct ruvd_enc_session_init_s
+{
+ uint32_t aligned_picture_width;
+ uint32_t aligned_picture_height;
+ uint32_t padding_width;
+ uint32_t padding_height;
+ uint32_t pre_encode_mode;
+ uint32_t pre_encode_chroma_enabled;
+} ruvd_enc_session_init_t;
+
+typedef struct ruvd_enc_layer_control_s
+{
+ uint32_t max_num_temporal_layers;
+ uint32_t num_temporal_layers;
+} ruvd_enc_layer_control_t;
+
+typedef struct ruvd_enc_layer_select_s
+{
+ uint32_t temporal_layer_index;
+} ruvd_enc_layer_select_t;
+
+typedef struct ruvd_enc_hevc_slice_control_s
+{
+ uint32_t slice_control_mode;
+ union
+ {
+ struct
+ {
+ uint32_t num_ctbs_per_slice;
+ uint32_t num_ctbs_per_slice_segment;
+ } fixed_ctbs_per_slice;
+
+ struct
+ {
+ uint32_t num_bits_per_slice;
+ uint32_t num_bits_per_slice_segment;
+ } fixed_bits_per_slice;
+ };
+} ruvd_enc_hevc_slice_control_t;
+
+typedef struct ruvd_enc_hevc_spec_misc_s
+{
+ uint32_t log2_min_luma_coding_block_size_minus3;
+ uint32_t amp_disabled;
+ uint32_t strong_intra_smoothing_enabled;
+ uint32_t constrained_intra_pred_flag;
+ uint32_t cabac_init_flag;
+ uint32_t half_pel_enabled;
+ uint32_t quarter_pel_enabled;
+} ruvd_enc_hevc_spec_misc_t;
+
+typedef struct ruvd_enc_rate_ctl_session_init_s
+{
+ uint32_t rate_control_method;
+ uint32_t vbv_buffer_level;
+} ruvd_enc_rate_ctl_session_init_t;
+
+typedef struct ruvd_enc_rate_ctl_layer_init_s
+{
+ uint32_t target_bit_rate;
+ uint32_t peak_bit_rate;
+ uint32_t frame_rate_num;
+ uint32_t frame_rate_den;
+ uint32_t vbv_buffer_size;
+ uint32_t avg_target_bits_per_picture;
+ uint32_t peak_bits_per_picture_integer;
+ uint32_t peak_bits_per_picture_fractional;
+} ruvd_enc_rate_ctl_layer_init_t;
+
+typedef struct ruvd_enc_rate_ctl_per_picture_s
+{
+ uint32_t qp;
+ uint32_t min_qp_app;
+ uint32_t max_qp_app;
+ uint32_t max_au_size;
+ uint32_t enabled_filler_data;
+ uint32_t skip_frame_enable;
+ uint32_t enforce_hrd;
+} ruvd_enc_rate_ctl_per_picture_t;
+
+typedef struct ruvd_enc_quality_params_s
+{
+ uint32_t vbaq_mode;
+ uint32_t scene_change_sensitivity;
+ uint32_t scene_change_min_idr_interval;
+} ruvd_enc_quality_params_t;
+
+typedef struct ruvd_enc_direct_output_nalu_s
+{
+ uint32_t type;
+ uint32_t size;
+ uint32_t data[1];
+} ruvd_enc_direct_output_nalu_t;
+
+typedef struct ruvd_enc_slice_header_s
+{
+ uint32_t
+ bitstream_template
+ [RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS];
+ struct
+ {
+ uint32_t instruction;
+ uint32_t num_bits;
+ } instructions[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS];
+} ruvd_enc_slice_header_t;
+
+typedef struct ruvd_enc_encode_params_s
+{
+ uint32_t pic_type;
+ uint32_t allowed_max_bitstream_size;
+ uint32_t input_picture_luma_address_hi;
+ uint32_t input_picture_luma_address_lo;
+ uint32_t input_picture_chroma_address_hi;
+ uint32_t input_picture_chroma_address_lo;
+ uint32_t input_pic_luma_pitch;
+ uint32_t input_pic_chroma_pitch;
+ union
+ {
+ uint32_t input_pic_addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t input_pic_array_mode;
+ uint32_t input_pic_swizzle_mode;
+ };
+ uint32_t reference_picture_index;
+ uint32_t reconstructed_picture_index;
+} ruvd_enc_encode_params_t;
+
+typedef struct ruvd_enc_hevc_deblocking_filter_s
+{
+ uint32_t loop_filter_across_slices_enabled;
+ int32_t deblocking_filter_disabled;
+ int32_t beta_offset_div2;
+ int32_t tc_offset_div2;
+ int32_t cb_qp_offset;
+ int32_t cr_qp_offset;
+} ruvd_enc_hevc_deblocking_filter_t;
+
+typedef struct ruvd_enc_intra_refresh_s
+{
+ uint32_t intra_refresh_mode;
+ uint32_t offset;
+ uint32_t region_size;
+} ruvd_enc_intra_refresh_t;
+
+typedef struct ruvd_enc_reconstructed_picture_s
+{
+ uint32_t luma_offset;
+ uint32_t chroma_offset;
+} ruvd_enc_reconstructed_picture_t;
+
+typedef struct ruvd_enc_encode_context_buffer_s
+{
+ uint32_t encode_context_address_hi;
+ uint32_t encode_context_address_lo;
+ union
+ {
+ uint32_t addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t array_mode;
+ uint32_t swizzle_mode;
+ };
+ uint32_t rec_luma_pitch;
+ uint32_t rec_chroma_pitch;
+ uint32_t num_reconstructed_pictures;
+ ruvd_enc_reconstructed_picture_t
+ reconstructed_pictures[RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ uint32_t pre_encode_picture_luma_pitch;
+ uint32_t pre_encode_picture_chroma_pitch;
+ ruvd_enc_reconstructed_picture_t
+ pre_encode_reconstructed_pictures
+ [RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ ruvd_enc_reconstructed_picture_t pre_encode_input_picture;
+} ruvd_enc_encode_context_buffer_t;
+
+typedef struct ruvd_enc_video_bitstream_buffer_s
+{
+ uint32_t mode;
+ uint32_t video_bitstream_buffer_address_hi;
+ uint32_t video_bitstream_buffer_address_lo;
+ uint32_t video_bitstream_buffer_size;
+ uint32_t video_bitstream_data_offset;
+} ruvd_enc_video_bitstream_buffer_t;
+
+typedef struct ruvd_enc_feedback_buffer_s
+{
+ uint32_t mode;
+ uint32_t feedback_buffer_address_hi;
+ uint32_t feedback_buffer_address_lo;
+ uint32_t feedback_buffer_size;
+ uint32_t feedback_data_size;
+} ruvd_enc_feedback_buffer_t;
+
+typedef void (*radeon_uvd_enc_get_buffer) (struct pipe_resource * resource,
+ struct pb_buffer ** handle,
+ struct radeon_surf ** surface);
+
+struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context
+ *context,
+ const struct
+ pipe_video_codec *templat,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer
+ get_buffer);
+
+struct radeon_uvd_enc_pic
+{
+ enum pipe_h265_enc_picture_type picture_type;
+
+ unsigned frame_num;
+ unsigned pic_order_cnt;
+ unsigned pic_order_cnt_type;
+ unsigned ref_idx_l0;
+ unsigned ref_idx_l1;
+ unsigned crop_left;
+ unsigned crop_right;
+ unsigned crop_top;
+ unsigned crop_bottom;
+ unsigned general_tier_flag;
+ unsigned general_profile_idc;
+ unsigned general_level_idc;
+ unsigned max_poc;
+ unsigned log2_max_poc;
+ unsigned chroma_format_idc;
+ unsigned pic_width_in_luma_samples;
+ unsigned pic_height_in_luma_samples;
+ unsigned log2_diff_max_min_luma_coding_block_size;
+ unsigned log2_min_transform_block_size_minus2;
+ unsigned log2_diff_max_min_transform_block_size;
+ unsigned max_transform_hierarchy_depth_inter;
+ unsigned max_transform_hierarchy_depth_intra;
+ unsigned log2_parallel_merge_level_minus2;
+ unsigned bit_depth_luma_minus8;
+ unsigned bit_depth_chroma_minus8;
+ unsigned nal_unit_type;
+ unsigned max_num_merge_cand;
+
+ bool not_referenced;
+ bool is_idr;
+ bool is_even_frame;
+ bool sample_adaptive_offset_enabled_flag;
+ bool pcm_enabled_flag;
+ bool sps_temporal_mvp_enabled_flag;
+
+ ruvd_enc_task_info_t task_info;
+ ruvd_enc_session_init_t session_init;
+ ruvd_enc_layer_control_t layer_ctrl;
+ ruvd_enc_layer_select_t layer_sel;
+ ruvd_enc_hevc_slice_control_t hevc_slice_ctrl;
+ ruvd_enc_hevc_spec_misc_t hevc_spec_misc;
+ ruvd_enc_rate_ctl_session_init_t rc_session_init;
+ ruvd_enc_rate_ctl_layer_init_t rc_layer_init;
+ ruvd_enc_hevc_deblocking_filter_t hevc_deblock;
+ ruvd_enc_rate_ctl_per_picture_t rc_per_pic;
+ ruvd_enc_quality_params_t quality_params;
+ ruvd_enc_encode_context_buffer_t ctx_buf;
+ ruvd_enc_video_bitstream_buffer_t bit_buf;
+ ruvd_enc_feedback_buffer_t fb_buf;
+ ruvd_enc_intra_refresh_t intra_ref;
+ ruvd_enc_encode_params_t enc_params;
+};
+
+struct radeon_uvd_encoder
+{
+ struct pipe_video_codec base;
+
+ void (*begin) (struct radeon_uvd_encoder * enc,
+ struct pipe_picture_desc * pic);
+ void (*encode) (struct radeon_uvd_encoder * enc);
+ void (*destroy) (struct radeon_uvd_encoder * enc);
+
+ unsigned stream_handle;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys *ws;
+ struct radeon_winsys_cs *cs;
+
+ radeon_uvd_enc_get_buffer get_buffer;
+
+ struct pb_buffer *handle;
+ struct radeon_surf *luma;
+ struct radeon_surf *chroma;
+
+ struct pb_buffer *bs_handle;
+ unsigned bs_size;
+
+ unsigned cpb_num;
+
+ struct rvid_buffer *si;
+ struct rvid_buffer *fb;
+ struct rvid_buffer cpb;
+ struct radeon_uvd_enc_pic enc_pic;
+
+ unsigned shifter;
+ unsigned bits_in_shifter;
+ unsigned num_zeros;
+ unsigned byte_index;
+ unsigned bits_output;
+ uint32_t total_task_size;
+ uint32_t *p_task_size;
+
+ bool emulation_prevention;
+ bool need_feedback;
+};
+
+void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc);
+bool si_radeon_uvd_enc_supported(struct si_screen *rscreen);
+
+#endif // _RADEON_UVD_ENC_H
James Zhu
2018-02-09 20:30:57 UTC
Permalink
in order keep the patch order for tacking. I will update at final update
Post by Boyuan Zhang
Better to add it to Makefile.source and Meson in this patch. Other
than this,
Post by James Zhu
Add hevc encode hardware interface for UVD
---
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++++++++++++++++++++
1 file changed, 471 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.h b/src/gallium/drivers/radeon/radeon_uvd_enc.h
new file mode 100644
index 0000000..1cca0d2
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.h
@@ -0,0 +1,471 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _RADEON_UVD_ENC_H
+#define _RADEON_UVD_ENC_H
+
+#define RENC_UVD_FW_INTERFACE_MAJOR_VERSION 1
+#define RENC_UVD_FW_INTERFACE_MINOR_VERSION 1
+
+#define RENC_UVD_IB_PARAM_SESSION_INFO 0x00000001
+#define RENC_UVD_IB_PARAM_TASK_INFO 0x00000002
+#define RENC_UVD_IB_PARAM_SESSION_INIT 0x00000003
+#define RENC_UVD_IB_PARAM_LAYER_CONTROL 0x00000004
+#define RENC_UVD_IB_PARAM_LAYER_SELECT 0x00000005
+#define RENC_UVD_IB_PARAM_SLICE_CONTROL 0x00000006
+#define RENC_UVD_IB_PARAM_SPEC_MISC 0x00000007
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000008
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000009
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0000000a
+#define RENC_UVD_IB_PARAM_SLICE_HEADER 0x0000000b
+#define RENC_UVD_IB_PARAM_ENCODE_PARAMS 0x0000000c
+#define RENC_UVD_IB_PARAM_QUALITY_PARAMS 0x0000000d
+#define RENC_UVD_IB_PARAM_DEBLOCKING_FILTER 0x0000000e
+#define RENC_UVD_IB_PARAM_INTRA_REFRESH 0x0000000f
+#define RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000010
+#define RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000011
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER 0x00000012
+#define RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER 0x00000013
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER_ADDITIONAL 0x00000014
+
+#define RENC_UVD_IB_OP_INITIALIZE 0x08000001
+#define RENC_UVD_IB_OP_CLOSE_SESSION 0x08000002
+#define RENC_UVD_IB_OP_ENCODE 0x08000003
+#define RENC_UVD_IB_OP_INIT_RC 0x08000004
+#define RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x08000005
+#define RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE 0x08000006
+#define RENC_UVD_IB_OP_SET_BALANCE_ENCODING_MODE 0x08000007
+#define RENC_UVD_IB_OP_SET_QUALITY_ENCODING_MODE 0x08000008
+
+#define RENC_UVD_IF_MAJOR_VERSION_MASK 0xFFFF0000
+#define RENC_UVD_IF_MAJOR_VERSION_SHIFT 16
+#define RENC_UVD_IF_MINOR_VERSION_MASK 0x0000FFFF
+#define RENC_UVD_IF_MINOR_VERSION_SHIFT 0
+
+#define RENC_UVD_PREENCODE_MODE_NONE 0x00000000
+#define RENC_UVD_PREENCODE_MODE_1X 0x00000001
+#define RENC_UVD_PREENCODE_MODE_2X 0x00000002
+#define RENC_UVD_PREENCODE_MODE_4X 0x00000004
+
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001
+
+#define RENC_UVD_RATE_CONTROL_METHOD_NONE 0x00000000
+#define RENC_UVD_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001
+#define RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002
+#define RENC_UVD_RATE_CONTROL_METHOD_CBR 0x00000003
+
+#define RENC_UVD_NALU_TYPE_AUD 0x00000001
+#define RENC_UVD_NALU_TYPE_VPS 0x00000002
+#define RENC_UVD_NALU_TYPE_SPS 0x00000003
+#define RENC_UVD_NALU_TYPE_PPS 0x00000004
+#define RENC_UVD_NALU_TYPE_END_OF_SEQUENCE 0x00000005
+
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16
+
+#define RENC_UVD_HEADER_INSTRUCTION_END 0
+#define RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 1
+#define RENC_UVD_HEADER_INSTRUCTION_COPY 2
+#define RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE 3
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT 4
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA 5
+
+#define RENC_UVD_PICTURE_TYPE_B 0
+#define RENC_UVD_PICTURE_TYPE_P 1
+#define RENC_UVD_PICTURE_TYPE_I 2
+#define RENC_UVD_PICTURE_TYPE_P_SKIP 3
+
+#define RENC_UVD_SWIZZLE_MODE_LINEAR 0
+#define RENC_UVD_SWIZZLE_MODE_256B_D 2
+#define RENC_UVD_SWIZZLE_MODE_4kB_D 6
+#define RENC_UVD_SWIZZLE_MODE_64kB_D 10
+#define RENC_UVD_INTRA_REFRESH_MODE_NONE 0
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_ROWS 1
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2
+
+#define RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES 34
+#define RENC_UVD_ADDR_MODE_LINEAR 0
+#define RENC_UVD_ADDR_MODE_PELE_8X8_1D 1
+#define RENC_UVD_ADDR_MODE_32AS8_88 2
+
+#define RENC_UVD_ARRAY_MODE_LINEAR 0
+#define RENC_UVD_ARRAY_MODE_PELE_8X8_1D 2
+#define RENC_UVD_ARRAY_MODE_2D_TILED_THIN1 4
+
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_STATUS_OK 0x00000000
+#define RENC_UVD_FEEDBACK_STATUS_NOT_ENCODED 0x10000001
+
+typedef struct radeon_uvd_enc_feedback_s
+{
+ uint32_t task_id;
+ uint32_t first_in_task;
+ uint32_t last_in_task;
+ uint32_t status;
+ uint32_t has_bitstream;
+ uint32_t bitstream_offset;
+ uint32_t bitstream_size;
+ uint32_t enabled_filler_data;
+ uint32_t filler_data_size;
+ uint32_t extra_bytes;
+} radeon_uvd_enc_feedback_t;
+
+typedef struct ruvd_enc_session_info_s
+{
+ uint32_t reserved;
+ uint32_t interface_version;
+ uint32_t sw_context_address_hi;
+ uint32_t sw_context_address_lo;
+} ruvd_enc_session_info_t;
+
+typedef struct ruvd_enc_task_info_s
+{
+ uint32_t total_size_of_all_packages;
+ uint32_t task_id;
+ uint32_t allowed_max_num_feedbacks;
+} ruvd_enc_task_info_t;
+
+typedef struct ruvd_enc_session_init_s
+{
+ uint32_t aligned_picture_width;
+ uint32_t aligned_picture_height;
+ uint32_t padding_width;
+ uint32_t padding_height;
+ uint32_t pre_encode_mode;
+ uint32_t pre_encode_chroma_enabled;
+} ruvd_enc_session_init_t;
+
+typedef struct ruvd_enc_layer_control_s
+{
+ uint32_t max_num_temporal_layers;
+ uint32_t num_temporal_layers;
+} ruvd_enc_layer_control_t;
+
+typedef struct ruvd_enc_layer_select_s
+{
+ uint32_t temporal_layer_index;
+} ruvd_enc_layer_select_t;
+
+typedef struct ruvd_enc_hevc_slice_control_s
+{
+ uint32_t slice_control_mode;
+ union
+ {
+ struct
+ {
+ uint32_t num_ctbs_per_slice;
+ uint32_t num_ctbs_per_slice_segment;
+ } fixed_ctbs_per_slice;
+
+ struct
+ {
+ uint32_t num_bits_per_slice;
+ uint32_t num_bits_per_slice_segment;
+ } fixed_bits_per_slice;
+ };
+} ruvd_enc_hevc_slice_control_t;
+
+typedef struct ruvd_enc_hevc_spec_misc_s
+{
+ uint32_t log2_min_luma_coding_block_size_minus3;
+ uint32_t amp_disabled;
+ uint32_t strong_intra_smoothing_enabled;
+ uint32_t constrained_intra_pred_flag;
+ uint32_t cabac_init_flag;
+ uint32_t half_pel_enabled;
+ uint32_t quarter_pel_enabled;
+} ruvd_enc_hevc_spec_misc_t;
+
+typedef struct ruvd_enc_rate_ctl_session_init_s
+{
+ uint32_t rate_control_method;
+ uint32_t vbv_buffer_level;
+} ruvd_enc_rate_ctl_session_init_t;
+
+typedef struct ruvd_enc_rate_ctl_layer_init_s
+{
+ uint32_t target_bit_rate;
+ uint32_t peak_bit_rate;
+ uint32_t frame_rate_num;
+ uint32_t frame_rate_den;
+ uint32_t vbv_buffer_size;
+ uint32_t avg_target_bits_per_picture;
+ uint32_t peak_bits_per_picture_integer;
+ uint32_t peak_bits_per_picture_fractional;
+} ruvd_enc_rate_ctl_layer_init_t;
+
+typedef struct ruvd_enc_rate_ctl_per_picture_s
+{
+ uint32_t qp;
+ uint32_t min_qp_app;
+ uint32_t max_qp_app;
+ uint32_t max_au_size;
+ uint32_t enabled_filler_data;
+ uint32_t skip_frame_enable;
+ uint32_t enforce_hrd;
+} ruvd_enc_rate_ctl_per_picture_t;
+
+typedef struct ruvd_enc_quality_params_s
+{
+ uint32_t vbaq_mode;
+ uint32_t scene_change_sensitivity;
+ uint32_t scene_change_min_idr_interval;
+} ruvd_enc_quality_params_t;
+
+typedef struct ruvd_enc_direct_output_nalu_s
+{
+ uint32_t type;
+ uint32_t size;
+ uint32_t data[1];
+} ruvd_enc_direct_output_nalu_t;
+
+typedef struct ruvd_enc_slice_header_s
+{
+ uint32_t
+ bitstream_template
+ [RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS];
+ struct
+ {
+ uint32_t instruction;
+ uint32_t num_bits;
+ } instructions[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS];
+} ruvd_enc_slice_header_t;
+
+typedef struct ruvd_enc_encode_params_s
+{
+ uint32_t pic_type;
+ uint32_t allowed_max_bitstream_size;
+ uint32_t input_picture_luma_address_hi;
+ uint32_t input_picture_luma_address_lo;
+ uint32_t input_picture_chroma_address_hi;
+ uint32_t input_picture_chroma_address_lo;
+ uint32_t input_pic_luma_pitch;
+ uint32_t input_pic_chroma_pitch;
+ union
+ {
+ uint32_t input_pic_addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t input_pic_array_mode;
+ uint32_t input_pic_swizzle_mode;
+ };
+ uint32_t reference_picture_index;
+ uint32_t reconstructed_picture_index;
+} ruvd_enc_encode_params_t;
+
+typedef struct ruvd_enc_hevc_deblocking_filter_s
+{
+ uint32_t loop_filter_across_slices_enabled;
+ int32_t deblocking_filter_disabled;
+ int32_t beta_offset_div2;
+ int32_t tc_offset_div2;
+ int32_t cb_qp_offset;
+ int32_t cr_qp_offset;
+} ruvd_enc_hevc_deblocking_filter_t;
+
+typedef struct ruvd_enc_intra_refresh_s
+{
+ uint32_t intra_refresh_mode;
+ uint32_t offset;
+ uint32_t region_size;
+} ruvd_enc_intra_refresh_t;
+
+typedef struct ruvd_enc_reconstructed_picture_s
+{
+ uint32_t luma_offset;
+ uint32_t chroma_offset;
+} ruvd_enc_reconstructed_picture_t;
+
+typedef struct ruvd_enc_encode_context_buffer_s
+{
+ uint32_t encode_context_address_hi;
+ uint32_t encode_context_address_lo;
+ union
+ {
+ uint32_t addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t array_mode;
+ uint32_t swizzle_mode;
+ };
+ uint32_t rec_luma_pitch;
+ uint32_t rec_chroma_pitch;
+ uint32_t num_reconstructed_pictures;
+ ruvd_enc_reconstructed_picture_t
+ reconstructed_pictures[RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ uint32_t pre_encode_picture_luma_pitch;
+ uint32_t pre_encode_picture_chroma_pitch;
+ ruvd_enc_reconstructed_picture_t
+ pre_encode_reconstructed_pictures
+ [RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ ruvd_enc_reconstructed_picture_t pre_encode_input_picture;
+} ruvd_enc_encode_context_buffer_t;
+
+typedef struct ruvd_enc_video_bitstream_buffer_s
+{
+ uint32_t mode;
+ uint32_t video_bitstream_buffer_address_hi;
+ uint32_t video_bitstream_buffer_address_lo;
+ uint32_t video_bitstream_buffer_size;
+ uint32_t video_bitstream_data_offset;
+} ruvd_enc_video_bitstream_buffer_t;
+
+typedef struct ruvd_enc_feedback_buffer_s
+{
+ uint32_t mode;
+ uint32_t feedback_buffer_address_hi;
+ uint32_t feedback_buffer_address_lo;
+ uint32_t feedback_buffer_size;
+ uint32_t feedback_data_size;
+} ruvd_enc_feedback_buffer_t;
+
+typedef void (*radeon_uvd_enc_get_buffer) (struct pipe_resource * resource,
+ struct pb_buffer ** handle,
+ struct radeon_surf ** surface);
+
+struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context
+ *context,
+ const struct
+ pipe_video_codec *templat,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer
+ get_buffer);
+
+struct radeon_uvd_enc_pic
+{
+ enum pipe_h265_enc_picture_type picture_type;
+
+ unsigned frame_num;
+ unsigned pic_order_cnt;
+ unsigned pic_order_cnt_type;
+ unsigned ref_idx_l0;
+ unsigned ref_idx_l1;
+ unsigned crop_left;
+ unsigned crop_right;
+ unsigned crop_top;
+ unsigned crop_bottom;
+ unsigned general_tier_flag;
+ unsigned general_profile_idc;
+ unsigned general_level_idc;
+ unsigned max_poc;
+ unsigned log2_max_poc;
+ unsigned chroma_format_idc;
+ unsigned pic_width_in_luma_samples;
+ unsigned pic_height_in_luma_samples;
+ unsigned log2_diff_max_min_luma_coding_block_size;
+ unsigned log2_min_transform_block_size_minus2;
+ unsigned log2_diff_max_min_transform_block_size;
+ unsigned max_transform_hierarchy_depth_inter;
+ unsigned max_transform_hierarchy_depth_intra;
+ unsigned log2_parallel_merge_level_minus2;
+ unsigned bit_depth_luma_minus8;
+ unsigned bit_depth_chroma_minus8;
+ unsigned nal_unit_type;
+ unsigned max_num_merge_cand;
+
+ bool not_referenced;
+ bool is_idr;
+ bool is_even_frame;
+ bool sample_adaptive_offset_enabled_flag;
+ bool pcm_enabled_flag;
+ bool sps_temporal_mvp_enabled_flag;
+
+ ruvd_enc_task_info_t task_info;
+ ruvd_enc_session_init_t session_init;
+ ruvd_enc_layer_control_t layer_ctrl;
+ ruvd_enc_layer_select_t layer_sel;
+ ruvd_enc_hevc_slice_control_t hevc_slice_ctrl;
+ ruvd_enc_hevc_spec_misc_t hevc_spec_misc;
+ ruvd_enc_rate_ctl_session_init_t rc_session_init;
+ ruvd_enc_rate_ctl_layer_init_t rc_layer_init;
+ ruvd_enc_hevc_deblocking_filter_t hevc_deblock;
+ ruvd_enc_rate_ctl_per_picture_t rc_per_pic;
+ ruvd_enc_quality_params_t quality_params;
+ ruvd_enc_encode_context_buffer_t ctx_buf;
+ ruvd_enc_video_bitstream_buffer_t bit_buf;
+ ruvd_enc_feedback_buffer_t fb_buf;
+ ruvd_enc_intra_refresh_t intra_ref;
+ ruvd_enc_encode_params_t enc_params;
+};
+
+struct radeon_uvd_encoder
+{
+ struct pipe_video_codec base;
+
+ void (*begin) (struct radeon_uvd_encoder * enc,
+ struct pipe_picture_desc * pic);
+ void (*encode) (struct radeon_uvd_encoder * enc);
+ void (*destroy) (struct radeon_uvd_encoder * enc);
+
+ unsigned stream_handle;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys *ws;
+ struct radeon_winsys_cs *cs;
+
+ radeon_uvd_enc_get_buffer get_buffer;
+
+ struct pb_buffer *handle;
+ struct radeon_surf *luma;
+ struct radeon_surf *chroma;
+
+ struct pb_buffer *bs_handle;
+ unsigned bs_size;
+
+ unsigned cpb_num;
+
+ struct rvid_buffer *si;
+ struct rvid_buffer *fb;
+ struct rvid_buffer cpb;
+ struct radeon_uvd_enc_pic enc_pic;
+
+ unsigned shifter;
+ unsigned bits_in_shifter;
+ unsigned num_zeros;
+ unsigned byte_index;
+ unsigned bits_output;
+ uint32_t total_task_size;
+ uint32_t *p_task_size;
+
+ bool emulation_prevention;
+ bool need_feedback;
+};
+
+void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc);
+bool si_radeon_uvd_enc_supported(struct si_screen *rscreen);
+
+#endif // _RADEON_UVD_ENC_H
James Zhu
2018-02-09 20:34:54 UTC
Permalink
Add hevc encode hardware interface for UVD

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++++++++++++++++++++
1 file changed, 471 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.h b/src/gallium/drivers/radeon/radeon_uvd_enc.h
new file mode 100644
index 0000000..ef9d37d
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.h
@@ -0,0 +1,471 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _RADEON_UVD_ENC_H
+#define _RADEON_UVD_ENC_H
+
+#define RENC_UVD_FW_INTERFACE_MAJOR_VERSION 1
+#define RENC_UVD_FW_INTERFACE_MINOR_VERSION 1
+
+#define RENC_UVD_IB_PARAM_SESSION_INFO 0x00000001
+#define RENC_UVD_IB_PARAM_TASK_INFO 0x00000002
+#define RENC_UVD_IB_PARAM_SESSION_INIT 0x00000003
+#define RENC_UVD_IB_PARAM_LAYER_CONTROL 0x00000004
+#define RENC_UVD_IB_PARAM_LAYER_SELECT 0x00000005
+#define RENC_UVD_IB_PARAM_SLICE_CONTROL 0x00000006
+#define RENC_UVD_IB_PARAM_SPEC_MISC 0x00000007
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000008
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000009
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0000000a
+#define RENC_UVD_IB_PARAM_SLICE_HEADER 0x0000000b
+#define RENC_UVD_IB_PARAM_ENCODE_PARAMS 0x0000000c
+#define RENC_UVD_IB_PARAM_QUALITY_PARAMS 0x0000000d
+#define RENC_UVD_IB_PARAM_DEBLOCKING_FILTER 0x0000000e
+#define RENC_UVD_IB_PARAM_INTRA_REFRESH 0x0000000f
+#define RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000010
+#define RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000011
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER 0x00000012
+#define RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER 0x00000013
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER_ADDITIONAL 0x00000014
+
+#define RENC_UVD_IB_OP_INITIALIZE 0x08000001
+#define RENC_UVD_IB_OP_CLOSE_SESSION 0x08000002
+#define RENC_UVD_IB_OP_ENCODE 0x08000003
+#define RENC_UVD_IB_OP_INIT_RC 0x08000004
+#define RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x08000005
+#define RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE 0x08000006
+#define RENC_UVD_IB_OP_SET_BALANCE_ENCODING_MODE 0x08000007
+#define RENC_UVD_IB_OP_SET_QUALITY_ENCODING_MODE 0x08000008
+
+#define RENC_UVD_IF_MAJOR_VERSION_MASK 0xFFFF0000
+#define RENC_UVD_IF_MAJOR_VERSION_SHIFT 16
+#define RENC_UVD_IF_MINOR_VERSION_MASK 0x0000FFFF
+#define RENC_UVD_IF_MINOR_VERSION_SHIFT 0
+
+#define RENC_UVD_PREENCODE_MODE_NONE 0x00000000
+#define RENC_UVD_PREENCODE_MODE_1X 0x00000001
+#define RENC_UVD_PREENCODE_MODE_2X 0x00000002
+#define RENC_UVD_PREENCODE_MODE_4X 0x00000004
+
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001
+
+#define RENC_UVD_RATE_CONTROL_METHOD_NONE 0x00000000
+#define RENC_UVD_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001
+#define RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002
+#define RENC_UVD_RATE_CONTROL_METHOD_CBR 0x00000003
+
+#define RENC_UVD_NALU_TYPE_AUD 0x00000001
+#define RENC_UVD_NALU_TYPE_VPS 0x00000002
+#define RENC_UVD_NALU_TYPE_SPS 0x00000003
+#define RENC_UVD_NALU_TYPE_PPS 0x00000004
+#define RENC_UVD_NALU_TYPE_END_OF_SEQUENCE 0x00000005
+
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16
+
+#define RENC_UVD_HEADER_INSTRUCTION_END 0
+#define RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 1
+#define RENC_UVD_HEADER_INSTRUCTION_COPY 2
+#define RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE 3
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT 4
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA 5
+
+#define RENC_UVD_PICTURE_TYPE_B 0
+#define RENC_UVD_PICTURE_TYPE_P 1
+#define RENC_UVD_PICTURE_TYPE_I 2
+#define RENC_UVD_PICTURE_TYPE_P_SKIP 3
+
+#define RENC_UVD_SWIZZLE_MODE_LINEAR 0
+#define RENC_UVD_SWIZZLE_MODE_256B_D 2
+#define RENC_UVD_SWIZZLE_MODE_4kB_D 6
+#define RENC_UVD_SWIZZLE_MODE_64kB_D 10
+#define RENC_UVD_INTRA_REFRESH_MODE_NONE 0
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_ROWS 1
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2
+
+#define RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES 34
+#define RENC_UVD_ADDR_MODE_LINEAR 0
+#define RENC_UVD_ADDR_MODE_PELE_8X8_1D 1
+#define RENC_UVD_ADDR_MODE_32AS8_88 2
+
+#define RENC_UVD_ARRAY_MODE_LINEAR 0
+#define RENC_UVD_ARRAY_MODE_PELE_8X8_1D 2
+#define RENC_UVD_ARRAY_MODE_2D_TILED_THIN1 4
+
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_STATUS_OK 0x00000000
+#define RENC_UVD_FEEDBACK_STATUS_NOT_ENCODED 0x10000001
+
+typedef struct radeon_uvd_enc_feedback_s
+{
+ uint32_t task_id;
+ uint32_t first_in_task;
+ uint32_t last_in_task;
+ uint32_t status;
+ uint32_t has_bitstream;
+ uint32_t bitstream_offset;
+ uint32_t bitstream_size;
+ uint32_t enabled_filler_data;
+ uint32_t filler_data_size;
+ uint32_t extra_bytes;
+} radeon_uvd_enc_feedback_t;
+
+typedef struct ruvd_enc_session_info_s
+{
+ uint32_t reserved;
+ uint32_t interface_version;
+ uint32_t sw_context_address_hi;
+ uint32_t sw_context_address_lo;
+} ruvd_enc_session_info_t;
+
+typedef struct ruvd_enc_task_info_s
+{
+ uint32_t total_size_of_all_packages;
+ uint32_t task_id;
+ uint32_t allowed_max_num_feedbacks;
+} ruvd_enc_task_info_t;
+
+typedef struct ruvd_enc_session_init_s
+{
+ uint32_t aligned_picture_width;
+ uint32_t aligned_picture_height;
+ uint32_t padding_width;
+ uint32_t padding_height;
+ uint32_t pre_encode_mode;
+ uint32_t pre_encode_chroma_enabled;
+} ruvd_enc_session_init_t;
+
+typedef struct ruvd_enc_layer_control_s
+{
+ uint32_t max_num_temporal_layers;
+ uint32_t num_temporal_layers;
+} ruvd_enc_layer_control_t;
+
+typedef struct ruvd_enc_layer_select_s
+{
+ uint32_t temporal_layer_index;
+} ruvd_enc_layer_select_t;
+
+typedef struct ruvd_enc_hevc_slice_control_s
+{
+ uint32_t slice_control_mode;
+ union
+ {
+ struct
+ {
+ uint32_t num_ctbs_per_slice;
+ uint32_t num_ctbs_per_slice_segment;
+ } fixed_ctbs_per_slice;
+
+ struct
+ {
+ uint32_t num_bits_per_slice;
+ uint32_t num_bits_per_slice_segment;
+ } fixed_bits_per_slice;
+ };
+} ruvd_enc_hevc_slice_control_t;
+
+typedef struct ruvd_enc_hevc_spec_misc_s
+{
+ uint32_t log2_min_luma_coding_block_size_minus3;
+ uint32_t amp_disabled;
+ uint32_t strong_intra_smoothing_enabled;
+ uint32_t constrained_intra_pred_flag;
+ uint32_t cabac_init_flag;
+ uint32_t half_pel_enabled;
+ uint32_t quarter_pel_enabled;
+} ruvd_enc_hevc_spec_misc_t;
+
+typedef struct ruvd_enc_rate_ctl_session_init_s
+{
+ uint32_t rate_control_method;
+ uint32_t vbv_buffer_level;
+} ruvd_enc_rate_ctl_session_init_t;
+
+typedef struct ruvd_enc_rate_ctl_layer_init_s
+{
+ uint32_t target_bit_rate;
+ uint32_t peak_bit_rate;
+ uint32_t frame_rate_num;
+ uint32_t frame_rate_den;
+ uint32_t vbv_buffer_size;
+ uint32_t avg_target_bits_per_picture;
+ uint32_t peak_bits_per_picture_integer;
+ uint32_t peak_bits_per_picture_fractional;
+} ruvd_enc_rate_ctl_layer_init_t;
+
+typedef struct ruvd_enc_rate_ctl_per_picture_s
+{
+ uint32_t qp;
+ uint32_t min_qp_app;
+ uint32_t max_qp_app;
+ uint32_t max_au_size;
+ uint32_t enabled_filler_data;
+ uint32_t skip_frame_enable;
+ uint32_t enforce_hrd;
+} ruvd_enc_rate_ctl_per_picture_t;
+
+typedef struct ruvd_enc_quality_params_s
+{
+ uint32_t vbaq_mode;
+ uint32_t scene_change_sensitivity;
+ uint32_t scene_change_min_idr_interval;
+} ruvd_enc_quality_params_t;
+
+typedef struct ruvd_enc_direct_output_nalu_s
+{
+ uint32_t type;
+ uint32_t size;
+ uint32_t data[1];
+} ruvd_enc_direct_output_nalu_t;
+
+typedef struct ruvd_enc_slice_header_s
+{
+ uint32_t
+ bitstream_template
+ [RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS];
+ struct
+ {
+ uint32_t instruction;
+ uint32_t num_bits;
+ } instructions[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS];
+} ruvd_enc_slice_header_t;
+
+typedef struct ruvd_enc_encode_params_s
+{
+ uint32_t pic_type;
+ uint32_t allowed_max_bitstream_size;
+ uint32_t input_picture_luma_address_hi;
+ uint32_t input_picture_luma_address_lo;
+ uint32_t input_picture_chroma_address_hi;
+ uint32_t input_picture_chroma_address_lo;
+ uint32_t input_pic_luma_pitch;
+ uint32_t input_pic_chroma_pitch;
+ union
+ {
+ uint32_t input_pic_addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t input_pic_array_mode;
+ uint32_t input_pic_swizzle_mode;
+ };
+ uint32_t reference_picture_index;
+ uint32_t reconstructed_picture_index;
+} ruvd_enc_encode_params_t;
+
+typedef struct ruvd_enc_hevc_deblocking_filter_s
+{
+ uint32_t loop_filter_across_slices_enabled;
+ int32_t deblocking_filter_disabled;
+ int32_t beta_offset_div2;
+ int32_t tc_offset_div2;
+ int32_t cb_qp_offset;
+ int32_t cr_qp_offset;
+} ruvd_enc_hevc_deblocking_filter_t;
+
+typedef struct ruvd_enc_intra_refresh_s
+{
+ uint32_t intra_refresh_mode;
+ uint32_t offset;
+ uint32_t region_size;
+} ruvd_enc_intra_refresh_t;
+
+typedef struct ruvd_enc_reconstructed_picture_s
+{
+ uint32_t luma_offset;
+ uint32_t chroma_offset;
+} ruvd_enc_reconstructed_picture_t;
+
+typedef struct ruvd_enc_encode_context_buffer_s
+{
+ uint32_t encode_context_address_hi;
+ uint32_t encode_context_address_lo;
+ union
+ {
+ uint32_t addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t array_mode;
+ uint32_t swizzle_mode;
+ };
+ uint32_t rec_luma_pitch;
+ uint32_t rec_chroma_pitch;
+ uint32_t num_reconstructed_pictures;
+ ruvd_enc_reconstructed_picture_t
+ reconstructed_pictures[RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ uint32_t pre_encode_picture_luma_pitch;
+ uint32_t pre_encode_picture_chroma_pitch;
+ ruvd_enc_reconstructed_picture_t
+ pre_encode_reconstructed_pictures
+ [RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ ruvd_enc_reconstructed_picture_t pre_encode_input_picture;
+} ruvd_enc_encode_context_buffer_t;
+
+typedef struct ruvd_enc_video_bitstream_buffer_s
+{
+ uint32_t mode;
+ uint32_t video_bitstream_buffer_address_hi;
+ uint32_t video_bitstream_buffer_address_lo;
+ uint32_t video_bitstream_buffer_size;
+ uint32_t video_bitstream_data_offset;
+} ruvd_enc_video_bitstream_buffer_t;
+
+typedef struct ruvd_enc_feedback_buffer_s
+{
+ uint32_t mode;
+ uint32_t feedback_buffer_address_hi;
+ uint32_t feedback_buffer_address_lo;
+ uint32_t feedback_buffer_size;
+ uint32_t feedback_data_size;
+} ruvd_enc_feedback_buffer_t;
+
+typedef void (*radeon_uvd_enc_get_buffer) (struct pipe_resource * resource,
+ struct pb_buffer ** handle,
+ struct radeon_surf ** surface);
+
+struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context
+ *context,
+ const struct
+ pipe_video_codec *templat,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer
+ get_buffer);
+
+struct radeon_uvd_enc_pic
+{
+ enum pipe_h265_enc_picture_type picture_type;
+
+ unsigned frame_num;
+ unsigned pic_order_cnt;
+ unsigned pic_order_cnt_type;
+ unsigned ref_idx_l0;
+ unsigned ref_idx_l1;
+ unsigned crop_left;
+ unsigned crop_right;
+ unsigned crop_top;
+ unsigned crop_bottom;
+ unsigned general_tier_flag;
+ unsigned general_profile_idc;
+ unsigned general_level_idc;
+ unsigned max_poc;
+ unsigned log2_max_poc;
+ unsigned chroma_format_idc;
+ unsigned pic_width_in_luma_samples;
+ unsigned pic_height_in_luma_samples;
+ unsigned log2_diff_max_min_luma_coding_block_size;
+ unsigned log2_min_transform_block_size_minus2;
+ unsigned log2_diff_max_min_transform_block_size;
+ unsigned max_transform_hierarchy_depth_inter;
+ unsigned max_transform_hierarchy_depth_intra;
+ unsigned log2_parallel_merge_level_minus2;
+ unsigned bit_depth_luma_minus8;
+ unsigned bit_depth_chroma_minus8;
+ unsigned nal_unit_type;
+ unsigned max_num_merge_cand;
+
+ bool not_referenced;
+ bool is_iframe;
+ bool is_even_frame;
+ bool sample_adaptive_offset_enabled_flag;
+ bool pcm_enabled_flag;
+ bool sps_temporal_mvp_enabled_flag;
+
+ ruvd_enc_task_info_t task_info;
+ ruvd_enc_session_init_t session_init;
+ ruvd_enc_layer_control_t layer_ctrl;
+ ruvd_enc_layer_select_t layer_sel;
+ ruvd_enc_hevc_slice_control_t hevc_slice_ctrl;
+ ruvd_enc_hevc_spec_misc_t hevc_spec_misc;
+ ruvd_enc_rate_ctl_session_init_t rc_session_init;
+ ruvd_enc_rate_ctl_layer_init_t rc_layer_init;
+ ruvd_enc_hevc_deblocking_filter_t hevc_deblock;
+ ruvd_enc_rate_ctl_per_picture_t rc_per_pic;
+ ruvd_enc_quality_params_t quality_params;
+ ruvd_enc_encode_context_buffer_t ctx_buf;
+ ruvd_enc_video_bitstream_buffer_t bit_buf;
+ ruvd_enc_feedback_buffer_t fb_buf;
+ ruvd_enc_intra_refresh_t intra_ref;
+ ruvd_enc_encode_params_t enc_params;
+};
+
+struct radeon_uvd_encoder
+{
+ struct pipe_video_codec base;
+
+ void (*begin) (struct radeon_uvd_encoder * enc,
+ struct pipe_picture_desc * pic);
+ void (*encode) (struct radeon_uvd_encoder * enc);
+ void (*destroy) (struct radeon_uvd_encoder * enc);
+
+ unsigned stream_handle;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys *ws;
+ struct radeon_winsys_cs *cs;
+
+ radeon_uvd_enc_get_buffer get_buffer;
+
+ struct pb_buffer *handle;
+ struct radeon_surf *luma;
+ struct radeon_surf *chroma;
+
+ struct pb_buffer *bs_handle;
+ unsigned bs_size;
+
+ unsigned cpb_num;
+
+ struct rvid_buffer *si;
+ struct rvid_buffer *fb;
+ struct rvid_buffer cpb;
+ struct radeon_uvd_enc_pic enc_pic;
+
+ unsigned shifter;
+ unsigned bits_in_shifter;
+ unsigned num_zeros;
+ unsigned byte_index;
+ unsigned bits_output;
+ uint32_t total_task_size;
+ uint32_t *p_task_size;
+
+ bool emulation_prevention;
+ bool need_feedback;
+};
+
+void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc);
+bool si_radeon_uvd_enc_supported(struct si_screen *rscreen);
+
+#endif // _RADEON_UVD_ENC_H
--
2.7.4
James Zhu
2018-02-12 15:14:13 UTC
Permalink
Add hevc encode hardware interface for UVD

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc.h | 469 ++++++++++++++++++++++++++++
1 file changed, 469 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.h b/src/gallium/drivers/radeon/radeon_uvd_enc.h
new file mode 100644
index 0000000..20c340d
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.h
@@ -0,0 +1,469 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _RADEON_UVD_ENC_H
+#define _RADEON_UVD_ENC_H
+
+#define RENC_UVD_FW_INTERFACE_MAJOR_VERSION 1
+#define RENC_UVD_FW_INTERFACE_MINOR_VERSION 1
+
+#define RENC_UVD_IB_PARAM_SESSION_INFO 0x00000001
+#define RENC_UVD_IB_PARAM_TASK_INFO 0x00000002
+#define RENC_UVD_IB_PARAM_SESSION_INIT 0x00000003
+#define RENC_UVD_IB_PARAM_LAYER_CONTROL 0x00000004
+#define RENC_UVD_IB_PARAM_LAYER_SELECT 0x00000005
+#define RENC_UVD_IB_PARAM_SLICE_CONTROL 0x00000006
+#define RENC_UVD_IB_PARAM_SPEC_MISC 0x00000007
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000008
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000009
+#define RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0000000a
+#define RENC_UVD_IB_PARAM_SLICE_HEADER 0x0000000b
+#define RENC_UVD_IB_PARAM_ENCODE_PARAMS 0x0000000c
+#define RENC_UVD_IB_PARAM_QUALITY_PARAMS 0x0000000d
+#define RENC_UVD_IB_PARAM_DEBLOCKING_FILTER 0x0000000e
+#define RENC_UVD_IB_PARAM_INTRA_REFRESH 0x0000000f
+#define RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000010
+#define RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000011
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER 0x00000012
+#define RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER 0x00000013
+#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER_ADDITIONAL 0x00000014
+
+#define RENC_UVD_IB_OP_INITIALIZE 0x08000001
+#define RENC_UVD_IB_OP_CLOSE_SESSION 0x08000002
+#define RENC_UVD_IB_OP_ENCODE 0x08000003
+#define RENC_UVD_IB_OP_INIT_RC 0x08000004
+#define RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x08000005
+#define RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE 0x08000006
+#define RENC_UVD_IB_OP_SET_BALANCE_ENCODING_MODE 0x08000007
+#define RENC_UVD_IB_OP_SET_QUALITY_ENCODING_MODE 0x08000008
+
+#define RENC_UVD_IF_MAJOR_VERSION_MASK 0xFFFF0000
+#define RENC_UVD_IF_MAJOR_VERSION_SHIFT 16
+#define RENC_UVD_IF_MINOR_VERSION_MASK 0x0000FFFF
+#define RENC_UVD_IF_MINOR_VERSION_SHIFT 0
+
+#define RENC_UVD_PREENCODE_MODE_NONE 0x00000000
+#define RENC_UVD_PREENCODE_MODE_1X 0x00000001
+#define RENC_UVD_PREENCODE_MODE_2X 0x00000002
+#define RENC_UVD_PREENCODE_MODE_4X 0x00000004
+
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000
+#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001
+
+#define RENC_UVD_RATE_CONTROL_METHOD_NONE 0x00000000
+#define RENC_UVD_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001
+#define RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002
+#define RENC_UVD_RATE_CONTROL_METHOD_CBR 0x00000003
+
+#define RENC_UVD_NALU_TYPE_AUD 0x00000001
+#define RENC_UVD_NALU_TYPE_VPS 0x00000002
+#define RENC_UVD_NALU_TYPE_SPS 0x00000003
+#define RENC_UVD_NALU_TYPE_PPS 0x00000004
+#define RENC_UVD_NALU_TYPE_END_OF_SEQUENCE 0x00000005
+
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16
+#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16
+
+#define RENC_UVD_HEADER_INSTRUCTION_END 0
+#define RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 1
+#define RENC_UVD_HEADER_INSTRUCTION_COPY 2
+#define RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE 3
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT 4
+#define RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA 5
+
+#define RENC_UVD_PICTURE_TYPE_B 0
+#define RENC_UVD_PICTURE_TYPE_P 1
+#define RENC_UVD_PICTURE_TYPE_I 2
+#define RENC_UVD_PICTURE_TYPE_P_SKIP 3
+
+#define RENC_UVD_SWIZZLE_MODE_LINEAR 0
+#define RENC_UVD_SWIZZLE_MODE_256B_D 2
+#define RENC_UVD_SWIZZLE_MODE_4kB_D 6
+#define RENC_UVD_SWIZZLE_MODE_64kB_D 10
+#define RENC_UVD_INTRA_REFRESH_MODE_NONE 0
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_ROWS 1
+#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2
+
+#define RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES 34
+#define RENC_UVD_ADDR_MODE_LINEAR 0
+#define RENC_UVD_ADDR_MODE_PELE_8X8_1D 1
+#define RENC_UVD_ADDR_MODE_32AS8_88 2
+
+#define RENC_UVD_ARRAY_MODE_LINEAR 0
+#define RENC_UVD_ARRAY_MODE_PELE_8X8_1D 2
+#define RENC_UVD_ARRAY_MODE_2D_TILED_THIN1 4
+
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR 0
+#define RENC_UVD_FEEDBACK_BUFFER_MODE_CIRCULAR 1
+
+#define RENC_UVD_FEEDBACK_STATUS_OK 0x00000000
+#define RENC_UVD_FEEDBACK_STATUS_NOT_ENCODED 0x10000001
+
+typedef struct radeon_uvd_enc_feedback_s
+{
+ uint32_t task_id;
+ uint32_t first_in_task;
+ uint32_t last_in_task;
+ uint32_t status;
+ uint32_t has_bitstream;
+ uint32_t bitstream_offset;
+ uint32_t bitstream_size;
+ uint32_t enabled_filler_data;
+ uint32_t filler_data_size;
+ uint32_t extra_bytes;
+} radeon_uvd_enc_feedback_t;
+
+typedef struct ruvd_enc_session_info_s
+{
+ uint32_t reserved;
+ uint32_t interface_version;
+ uint32_t sw_context_address_hi;
+ uint32_t sw_context_address_lo;
+} ruvd_enc_session_info_t;
+
+typedef struct ruvd_enc_task_info_s
+{
+ uint32_t total_size_of_all_packages;
+ uint32_t task_id;
+ uint32_t allowed_max_num_feedbacks;
+} ruvd_enc_task_info_t;
+
+typedef struct ruvd_enc_session_init_s
+{
+ uint32_t aligned_picture_width;
+ uint32_t aligned_picture_height;
+ uint32_t padding_width;
+ uint32_t padding_height;
+ uint32_t pre_encode_mode;
+ uint32_t pre_encode_chroma_enabled;
+} ruvd_enc_session_init_t;
+
+typedef struct ruvd_enc_layer_control_s
+{
+ uint32_t max_num_temporal_layers;
+ uint32_t num_temporal_layers;
+} ruvd_enc_layer_control_t;
+
+typedef struct ruvd_enc_layer_select_s
+{
+ uint32_t temporal_layer_index;
+} ruvd_enc_layer_select_t;
+
+typedef struct ruvd_enc_hevc_slice_control_s
+{
+ uint32_t slice_control_mode;
+ union
+ {
+ struct
+ {
+ uint32_t num_ctbs_per_slice;
+ uint32_t num_ctbs_per_slice_segment;
+ } fixed_ctbs_per_slice;
+
+ struct
+ {
+ uint32_t num_bits_per_slice;
+ uint32_t num_bits_per_slice_segment;
+ } fixed_bits_per_slice;
+ };
+} ruvd_enc_hevc_slice_control_t;
+
+typedef struct ruvd_enc_hevc_spec_misc_s
+{
+ uint32_t log2_min_luma_coding_block_size_minus3;
+ uint32_t amp_disabled;
+ uint32_t strong_intra_smoothing_enabled;
+ uint32_t constrained_intra_pred_flag;
+ uint32_t cabac_init_flag;
+ uint32_t half_pel_enabled;
+ uint32_t quarter_pel_enabled;
+} ruvd_enc_hevc_spec_misc_t;
+
+typedef struct ruvd_enc_rate_ctl_session_init_s
+{
+ uint32_t rate_control_method;
+ uint32_t vbv_buffer_level;
+} ruvd_enc_rate_ctl_session_init_t;
+
+typedef struct ruvd_enc_rate_ctl_layer_init_s
+{
+ uint32_t target_bit_rate;
+ uint32_t peak_bit_rate;
+ uint32_t frame_rate_num;
+ uint32_t frame_rate_den;
+ uint32_t vbv_buffer_size;
+ uint32_t avg_target_bits_per_picture;
+ uint32_t peak_bits_per_picture_integer;
+ uint32_t peak_bits_per_picture_fractional;
+} ruvd_enc_rate_ctl_layer_init_t;
+
+typedef struct ruvd_enc_rate_ctl_per_picture_s
+{
+ uint32_t qp;
+ uint32_t min_qp_app;
+ uint32_t max_qp_app;
+ uint32_t max_au_size;
+ uint32_t enabled_filler_data;
+ uint32_t skip_frame_enable;
+ uint32_t enforce_hrd;
+} ruvd_enc_rate_ctl_per_picture_t;
+
+typedef struct ruvd_enc_quality_params_s
+{
+ uint32_t vbaq_mode;
+ uint32_t scene_change_sensitivity;
+ uint32_t scene_change_min_idr_interval;
+} ruvd_enc_quality_params_t;
+
+typedef struct ruvd_enc_direct_output_nalu_s
+{
+ uint32_t type;
+ uint32_t size;
+ uint32_t data[1];
+} ruvd_enc_direct_output_nalu_t;
+
+typedef struct ruvd_enc_slice_header_s
+{
+ uint32_t
+ bitstream_template
+ [RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS];
+ struct
+ {
+ uint32_t instruction;
+ uint32_t num_bits;
+ } instructions[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS];
+} ruvd_enc_slice_header_t;
+
+typedef struct ruvd_enc_encode_params_s
+{
+ uint32_t pic_type;
+ uint32_t allowed_max_bitstream_size;
+ uint32_t input_picture_luma_address_hi;
+ uint32_t input_picture_luma_address_lo;
+ uint32_t input_picture_chroma_address_hi;
+ uint32_t input_picture_chroma_address_lo;
+ uint32_t input_pic_luma_pitch;
+ uint32_t input_pic_chroma_pitch;
+ union
+ {
+ uint32_t input_pic_addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t input_pic_array_mode;
+ uint32_t input_pic_swizzle_mode;
+ };
+ uint32_t reference_picture_index;
+ uint32_t reconstructed_picture_index;
+} ruvd_enc_encode_params_t;
+
+typedef struct ruvd_enc_hevc_deblocking_filter_s
+{
+ uint32_t loop_filter_across_slices_enabled;
+ int32_t deblocking_filter_disabled;
+ int32_t beta_offset_div2;
+ int32_t tc_offset_div2;
+ int32_t cb_qp_offset;
+ int32_t cr_qp_offset;
+} ruvd_enc_hevc_deblocking_filter_t;
+
+typedef struct ruvd_enc_intra_refresh_s
+{
+ uint32_t intra_refresh_mode;
+ uint32_t offset;
+ uint32_t region_size;
+} ruvd_enc_intra_refresh_t;
+
+typedef struct ruvd_enc_reconstructed_picture_s
+{
+ uint32_t luma_offset;
+ uint32_t chroma_offset;
+} ruvd_enc_reconstructed_picture_t;
+
+typedef struct ruvd_enc_encode_context_buffer_s
+{
+ uint32_t encode_context_address_hi;
+ uint32_t encode_context_address_lo;
+ union
+ {
+ uint32_t addr_mode;
+ uint32_t reserved;
+ };
+ union
+ {
+ uint32_t array_mode;
+ uint32_t swizzle_mode;
+ };
+ uint32_t rec_luma_pitch;
+ uint32_t rec_chroma_pitch;
+ uint32_t num_reconstructed_pictures;
+ ruvd_enc_reconstructed_picture_t
+ reconstructed_pictures[RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ uint32_t pre_encode_picture_luma_pitch;
+ uint32_t pre_encode_picture_chroma_pitch;
+ ruvd_enc_reconstructed_picture_t
+ pre_encode_reconstructed_pictures
+ [RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES];
+ ruvd_enc_reconstructed_picture_t pre_encode_input_picture;
+} ruvd_enc_encode_context_buffer_t;
+
+typedef struct ruvd_enc_video_bitstream_buffer_s
+{
+ uint32_t mode;
+ uint32_t video_bitstream_buffer_address_hi;
+ uint32_t video_bitstream_buffer_address_lo;
+ uint32_t video_bitstream_buffer_size;
+ uint32_t video_bitstream_data_offset;
+} ruvd_enc_video_bitstream_buffer_t;
+
+typedef struct ruvd_enc_feedback_buffer_s
+{
+ uint32_t mode;
+ uint32_t feedback_buffer_address_hi;
+ uint32_t feedback_buffer_address_lo;
+ uint32_t feedback_buffer_size;
+ uint32_t feedback_data_size;
+} ruvd_enc_feedback_buffer_t;
+
+typedef void (*radeon_uvd_enc_get_buffer) (struct pipe_resource * resource,
+ struct pb_buffer ** handle,
+ struct radeon_surf ** surface);
+
+struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context
+ *context,
+ const struct
+ pipe_video_codec *templat,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer
+ get_buffer);
+
+struct radeon_uvd_enc_pic
+{
+ enum pipe_h265_enc_picture_type picture_type;
+
+ unsigned frame_num;
+ unsigned pic_order_cnt;
+ unsigned pic_order_cnt_type;
+ unsigned crop_left;
+ unsigned crop_right;
+ unsigned crop_top;
+ unsigned crop_bottom;
+ unsigned general_tier_flag;
+ unsigned general_profile_idc;
+ unsigned general_level_idc;
+ unsigned max_poc;
+ unsigned log2_max_poc;
+ unsigned chroma_format_idc;
+ unsigned pic_width_in_luma_samples;
+ unsigned pic_height_in_luma_samples;
+ unsigned log2_diff_max_min_luma_coding_block_size;
+ unsigned log2_min_transform_block_size_minus2;
+ unsigned log2_diff_max_min_transform_block_size;
+ unsigned max_transform_hierarchy_depth_inter;
+ unsigned max_transform_hierarchy_depth_intra;
+ unsigned log2_parallel_merge_level_minus2;
+ unsigned bit_depth_luma_minus8;
+ unsigned bit_depth_chroma_minus8;
+ unsigned nal_unit_type;
+ unsigned max_num_merge_cand;
+
+ bool not_referenced;
+ bool is_iframe;
+ bool is_even_frame;
+ bool sample_adaptive_offset_enabled_flag;
+ bool pcm_enabled_flag;
+ bool sps_temporal_mvp_enabled_flag;
+
+ ruvd_enc_task_info_t task_info;
+ ruvd_enc_session_init_t session_init;
+ ruvd_enc_layer_control_t layer_ctrl;
+ ruvd_enc_layer_select_t layer_sel;
+ ruvd_enc_hevc_slice_control_t hevc_slice_ctrl;
+ ruvd_enc_hevc_spec_misc_t hevc_spec_misc;
+ ruvd_enc_rate_ctl_session_init_t rc_session_init;
+ ruvd_enc_rate_ctl_layer_init_t rc_layer_init;
+ ruvd_enc_hevc_deblocking_filter_t hevc_deblock;
+ ruvd_enc_rate_ctl_per_picture_t rc_per_pic;
+ ruvd_enc_quality_params_t quality_params;
+ ruvd_enc_encode_context_buffer_t ctx_buf;
+ ruvd_enc_video_bitstream_buffer_t bit_buf;
+ ruvd_enc_feedback_buffer_t fb_buf;
+ ruvd_enc_intra_refresh_t intra_ref;
+ ruvd_enc_encode_params_t enc_params;
+};
+
+struct radeon_uvd_encoder
+{
+ struct pipe_video_codec base;
+
+ void (*begin) (struct radeon_uvd_encoder * enc,
+ struct pipe_picture_desc * pic);
+ void (*encode) (struct radeon_uvd_encoder * enc);
+ void (*destroy) (struct radeon_uvd_encoder * enc);
+
+ unsigned stream_handle;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys *ws;
+ struct radeon_winsys_cs *cs;
+
+ radeon_uvd_enc_get_buffer get_buffer;
+
+ struct pb_buffer *handle;
+ struct radeon_surf *luma;
+ struct radeon_surf *chroma;
+
+ struct pb_buffer *bs_handle;
+ unsigned bs_size;
+
+ unsigned cpb_num;
+
+ struct rvid_buffer *si;
+ struct rvid_buffer *fb;
+ struct rvid_buffer cpb;
+ struct radeon_uvd_enc_pic enc_pic;
+
+ unsigned shifter;
+ unsigned bits_in_shifter;
+ unsigned num_zeros;
+ unsigned byte_index;
+ unsigned bits_output;
+ uint32_t total_task_size;
+ uint32_t *p_task_size;
+
+ bool emulation_prevention;
+ bool need_feedback;
+};
+
+void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc);
+bool si_radeon_uvd_enc_supported(struct si_screen *rscreen);
+
+#endif // _RADEON_UVD_ENC_H
--
2.7.4
James Zhu
2018-02-06 20:05:44 UTC
Permalink
Add UVD hevc encode pipe video codec creation entry

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeonsi/si_uvd.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c
index 64f2f8e..3906bbd 100644
--- a/src/gallium/drivers/radeonsi/si_uvd.c
+++ b/src/gallium/drivers/radeonsi/si_uvd.c
@@ -31,6 +31,8 @@
#include "radeon/radeon_vce.h"
#include "radeon/radeon_vcn_dec.h"
#include "radeon/radeon_vcn_enc.h"
+#include "radeon/radeon_uvd_enc.h"
+#include "util/u_video.h"

/**
* creates an video buffer with an UVD compatible memory layout
@@ -146,9 +148,16 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
struct si_context *ctx = (struct si_context *)context;
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;

- if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
- return (vcn) ? radeon_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer) :
- si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+ if (vcn) {
+ radeon_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ } else {
+ if (u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_HEVC)
+ return radeon_uvd_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ else
+ return si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ }
+ }

return (vcn) ? radeon_create_decoder(context, templ) :
si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb);
--
2.7.4
Boyuan Zhang
2018-02-07 22:34:02 UTC
Permalink
Post by James Zhu
Add UVD hevc encode pipe video codec creation entry
---
src/gallium/drivers/radeonsi/si_uvd.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c
index 64f2f8e..3906bbd 100644
--- a/src/gallium/drivers/radeonsi/si_uvd.c
+++ b/src/gallium/drivers/radeonsi/si_uvd.c
@@ -31,6 +31,8 @@
#include "radeon/radeon_vce.h"
#include "radeon/radeon_vcn_dec.h"
#include "radeon/radeon_vcn_enc.h"
+#include "radeon/radeon_uvd_enc.h"
+#include "util/u_video.h"
/**
* creates an video buffer with an UVD compatible memory layout
@@ -146,9 +148,16 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
struct si_context *ctx = (struct si_context *)context;
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
- if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
- si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+ if (vcn) {
+ radeon_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ } else {
+ if (u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_HEVC)
+ return radeon_uvd_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ else
+ return si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ }
+ }
si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb);
James Zhu
2018-02-06 20:05:41 UTC
Permalink
Implement required IBs for UVD HEVC encode.

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
1 file changed, 1115 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..17a39c2
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1115 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RADEON_ENC_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+RADEON_ENC_CS(cmd)
+#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
+ enc->total_task_size += *begin;}
+
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
+static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
+
+static void
+radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc,
+ struct pb_buffer *buf, enum radeon_bo_usage usage,
+ enum radeon_bo_domain domain, signed offset)
+{
+ enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain, RADEON_PRIO_VCE);
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RADEON_ENC_CS(addr >> 32);
+ RADEON_ENC_CS(addr);
+}
+
+static void
+radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *enc,
+ bool set)
+{
+ if (set != enc->emulation_prevention) {
+ enc->emulation_prevention = set;
+ enc->num_zeros = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->byte_index == 0)
+ enc->cs->current.buf[enc->cs->current.cdw] = 0;
+ enc->cs->current.buf[enc->cs->current.cdw] |=
+ ((unsigned int) (byte) << index_to_shifts[enc->byte_index]);
+ enc->byte_index++;
+
+ if (enc->byte_index >= 4) {
+ enc->byte_index = 0;
+ enc->cs->current.cdw++;
+ }
+}
+
+static void
+radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->emulation_prevention) {
+ if ((enc->num_zeros >= 2)
+ && ((byte == 0x00) || (byte == 0x01) || (byte == 0x03))) {
+ radeon_uvd_enc_output_one_byte(enc, 0x03);
+ enc->bits_output += 8;
+ enc->num_zeros = 0;
+ }
+ enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
+ }
+}
+
+static void
+radeon_uvd_enc_code_fixed_bits(struct radeon_uvd_encoder *enc,
+ unsigned int value, unsigned int num_bits)
+{
+ unsigned int bits_to_pack = 0;
+
+ while (num_bits > 0) {
+ unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits));
+ bits_to_pack =
+ num_bits >
+ (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits;
+
+ if (bits_to_pack < num_bits)
+ value_to_pack = value_to_pack >> (num_bits - bits_to_pack);
+
+ enc->shifter |=
+ value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack);
+ num_bits -= bits_to_pack;
+ enc->bits_in_shifter += bits_to_pack;
+
+ while (enc->bits_in_shifter >= 8) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ enc->shifter <<= 8;
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_in_shifter -= 8;
+ enc->bits_output += 8;
+ }
+ }
+}
+
+static void
+radeon_uvd_enc_reset(struct radeon_uvd_encoder *enc)
+{
+ enc->emulation_prevention = false;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->bits_output = 0;
+ enc->num_zeros = 0;
+ enc->byte_index = 0;
+}
+
+static void
+radeon_uvd_enc_byte_align(struct radeon_uvd_encoder *enc)
+{
+ unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8;
+
+ if (num_padding_zeros > 0)
+ radeon_uvd_enc_code_fixed_bits(enc, 0, num_padding_zeros);
+}
+
+static void
+radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc)
+{
+ if (enc->bits_in_shifter != 0) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_output += enc->bits_in_shifter;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->num_zeros = 0;
+ }
+
+ if (enc->byte_index > 0) {
+ enc->cs->current.cdw++;
+ enc->byte_index = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_code_ue(struct radeon_uvd_encoder *enc, unsigned int value)
+{
+ int x = -1;
+ unsigned int ue_code = value + 1;
+ value += 1;
+
+ while (value) {
+ value = (value >> 1);
+ x += 1;
+ }
+
+ unsigned int ue_length = (x << 1) + 1;
+ radeon_uvd_enc_code_fixed_bits(enc, ue_code, ue_length);
+}
+
+static void
+radeon_uvd_enc_code_se(struct radeon_uvd_encoder *enc, int value)
+{
+ unsigned int v = 0;
+
+ if (value != 0)
+ v = (value <
+ 0 ? ((unsigned int) (0 - value) << 1)
+ : (((unsigned int) (value) << 1) - 1));
+
+ radeon_uvd_enc_code_ue(enc, v);
+}
+
+static void
+radeon_uvd_enc_session_info(struct radeon_uvd_encoder *enc)
+{
+ unsigned int interface_version =
+ ((RENC_UVD_FW_INTERFACE_MAJOR_VERSION <<
+ RENC_UVD_IF_MAJOR_VERSION_SHIFT) |
+ (RENC_UVD_FW_INTERFACE_MINOR_VERSION <<
+ RENC_UVD_IF_MINOR_VERSION_SHIFT));
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INFO);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(interface_version);
+ RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_feedback)
+{
+ enc->enc_pic.task_info.task_id++;
+
+ if (need_feedback)
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 1;
+ else
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_TASK_INFO);
+ enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++];
+ RADEON_ENC_CS(enc->enc_pic.task_info.task_id);
+ RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.session_init.aligned_picture_width =
+ align(enc->base.width, 64);
+ enc->enc_pic.session_init.aligned_picture_height =
+ align(enc->base.height, 16);
+ enc->enc_pic.session_init.padding_width =
+ enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+ enc->enc_pic.session_init.padding_height =
+ enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+ enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
+ enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_control(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
+ enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_select(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_sel.temporal_layer_index = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_SELECT);
+ RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_control_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.hevc_slice_ctrl.slice_control_mode =
+ RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS;
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice =
+ align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64;
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice_segment =
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice_segment);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_spec_misc_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 =
+ pic->seq.log2_min_luma_coding_block_size_minus3;
+ enc->enc_pic.hevc_spec_misc.amp_disabled = !pic->seq.amp_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled =
+ pic->seq.strong_intra_smoothing_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag =
+ pic->pic.constrained_intra_pred_flag;
+ enc->enc_pic.hevc_spec_misc.cabac_init_flag = pic->slice.cabac_init_flag;
+ enc->enc_pic.hevc_spec_misc.half_pel_enabled = 1;
+ enc->enc_pic.hevc_spec_misc.quarter_pel_enabled = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SPEC_MISC);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.cabac_init_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.half_pel_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.quarter_pel_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc.vbv_buf_lv;
+ switch (pic->rc.rate_ctrl_method) {
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ break;
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP:
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_CBR;
+ break;
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP:
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+ break;
+ default:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ }
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.rate_control_method);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.vbv_buffer_level);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_layer_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_layer_init.target_bit_rate = pic->rc.target_bitrate;
+ enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rc.peak_bitrate;
+ enc->enc_pic.rc_layer_init.frame_rate_num = pic->rc.frame_rate_num;
+ enc->enc_pic.rc_layer_init.frame_rate_den = pic->rc.frame_rate_den;
+ enc->enc_pic.rc_layer_init.vbv_buffer_size = pic->rc.vbv_buffer_size;
+ enc->enc_pic.rc_layer_init.avg_target_bits_per_picture =
+ pic->rc.target_bits_picture;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer =
+ pic->rc.peak_bits_picture_integer;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional =
+ pic->rc.peak_bits_picture_fraction;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.target_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_num);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_den);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.vbv_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.avg_target_bits_per_picture);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_deblocking_filter_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled =
+ pic->slice.slice_loop_filter_across_slices_enabled_flag;
+ enc->enc_pic.hevc_deblock.deblocking_filter_disabled =
+ pic->slice.slice_deblocking_filter_disabled_flag;
+ enc->enc_pic.hevc_deblock.beta_offset_div2 =
+ pic->slice.slice_beta_offset_div2;
+ enc->enc_pic.hevc_deblock.tc_offset_div2 = pic->slice.slice_tc_offset_div2;
+ enc->enc_pic.hevc_deblock.cb_qp_offset = pic->slice.slice_cb_qp_offset;
+ enc->enc_pic.hevc_deblock.cr_qp_offset = pic->slice.slice_cr_qp_offset;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_DEBLOCKING_FILTER);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.quality_params.vbaq_mode = 0;
+ enc->enc_pic.quality_params.scene_change_sensitivity = 0;
+ enc->enc_pic.quality_params.scene_change_min_idr_interval = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_QUALITY_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_width);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_height);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ //Only support CTBSize 64
+ radeon_uvd_enc_code_ue(enc,
+ 6 -
+ (enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3 + 3));
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.log2_min_transform_block_size_minus2);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.
+ log2_diff_max_min_transform_block_size);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_inter);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ !enc->enc_pic.hevc_spec_misc.amp_disabled,
+ 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ sample_adaptive_offset_enabled_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
+
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ strong_intra_smoothing_enabled, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_se(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ constrained_intra_pred_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ deblocking_filter_disabled, 1);
+
+ if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ break;
+ default:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
+{
+ uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ unsigned int inst_index = 0;
+ unsigned int bit_index = 0;
+ unsigned int bits_copied = 0;
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
+ inst_index++;
+
+ if ((enc->enc_pic.nal_unit_type >= 16)
+ && (enc->enc_pic.nal_unit_type <= 23))
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
+ inst_index++;
+
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ radeon_uvd_enc_code_ue(enc, 0x2);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ break;
+ default:
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ }
+
+ if ((enc->enc_pic.nal_unit_type != 19)
+ && (enc->enc_pic.nal_unit_type != 20)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.frame_num %
+ enc->enc_pic.max_poc,
+ enc->enc_pic.log2_max_poc);
+ if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ else {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ }
+ }
+
+ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
+ (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ cabac_init_flag, 1);
+ radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
+ }
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+ inst_index++;
+
+ if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
+ (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+ }
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
+
+ for (int i = bit_index;
+ i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
+ j++) {
+ RADEON_ENC_CS(instruction[j]);
+ RADEON_ENC_CS(num_bits[j]);
+ }
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+
+ enc->enc_pic.ctx_buf.swizzle_mode = 0;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER);
+ RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures);
+ /* reconstructed_picture_1_luma_offset */
+ RADEON_ENC_CS(0x00000000);
+ /* reconstructed_picture_1_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16));
+ /* reconstructed_picture_2_luma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch *
+ align(enc->base.height, 16) * 3 / 2);
+ /* reconstructed_picture_2_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16) * 5 / 2);
+
+ for (int i = 0; i < 136; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.bit_buf.mode = RENC_UVD_SWIZZLE_MODE_LINEAR;
+ enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size;
+ enc->enc_pic.bit_buf.video_bitstream_data_offset = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.mode);
+ RADEON_ENC_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_data_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_feedback(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.fb_buf.mode = RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR;
+ enc->enc_pic.fb_buf.feedback_buffer_size = 16;
+ enc->enc_pic.fb_buf.feedback_data_size = 40;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_FEEDBACK_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.mode);
+ RADEON_ENC_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_data_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_intra_refresh(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.intra_ref.intra_refresh_mode =
+ RENC_UVD_INTRA_REFRESH_MODE_NONE;
+ enc->enc_pic.intra_ref.offset = 0;
+ enc->enc_pic.intra_ref.region_size = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INTRA_REFRESH);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.intra_refresh_mode);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.offset);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.region_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_per_pic(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_per_pic.qp = pic->rc.quant_i_frames;
+ enc->enc_pic.rc_per_pic.min_qp_app = 0;
+ enc->enc_pic.rc_per_pic.max_qp_app = 51;
+ enc->enc_pic.rc_per_pic.max_au_size = 0;
+ enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc.fill_data_enable;
+ enc->enc_pic.rc_per_pic.skip_frame_enable = false;
+ enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc.enforce_hrd;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enabled_filler_data);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.skip_frame_enable);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enforce_hrd);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P_SKIP;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_B;
+ break;
+ default:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ }
+
+ enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.enc_params.input_pic_swizzle_mode =
+ RENC_UVD_SWIZZLE_MODE_LINEAR;
+
+ if (enc->enc_pic.enc_params.pic_type == RENC_UVD_PICTURE_TYPE_I)
+ enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF;
+ else
+ enc->enc_pic.enc_params.reference_picture_index =
+ (enc->enc_pic.frame_num - 1) % 2;
+
+ enc->enc_pic.enc_params.reconstructed_picture_index =
+ enc->enc_pic.frame_num % 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size);
+
+ if (rscreen->info.chip_class < GFX9) {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.legacy.level[0].offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.legacy.level[0].offset);
+ }
+ else {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.gfx9.surf_offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.gfx9.surf_offset);
+ }
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INITIALIZE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_close(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_CLOSE_SESSION);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_enc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_ENCODE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc_vbv(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_speed(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE);
+ RADEON_ENC_END();
+}
+
+static void
+begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_init(enc);
+
+ radeon_uvd_enc_session_init_hevc(enc);
+ radeon_uvd_enc_slice_control_hevc(enc);
+ radeon_uvd_enc_spec_misc_hevc(enc, pic);
+ radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
+
+ radeon_uvd_enc_layer_control(enc);
+ radeon_uvd_enc_rc_session_init(enc, pic);
+ radeon_uvd_enc_quality_params(enc);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_layer_init(enc, pic);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_per_pic(enc, pic);
+ radeon_uvd_enc_op_init_rc(enc);
+ radeon_uvd_enc_op_init_rc_vbv(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+encode(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+
+ radeon_uvd_enc_nalu_aud_hevc(enc);
+ if (enc->enc_pic.is_idr) {
+ radeon_uvd_enc_nalu_vps_hevc(enc);
+ radeon_uvd_enc_nalu_pps_hevc(enc);
+ radeon_uvd_enc_nalu_sps_hevc(enc);
+ }
+ radeon_uvd_enc_slice_header_hevc(enc);
+ radeon_uvd_enc_encode_params_hevc(enc);
+
+ radeon_uvd_enc_ctx(enc);
+ radeon_uvd_enc_bitstream(enc);
+ radeon_uvd_enc_feedback(enc);
+ radeon_uvd_enc_intra_refresh(enc);
+
+ radeon_uvd_enc_op_speed(enc);
+ radeon_uvd_enc_op_enc(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+destroy(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_close(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+void
+radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
+{
+ enc->begin = begin;
+ enc->encode = encode;
+ enc->destroy = destroy;
+}
--
2.7.4
Boyuan Zhang
2018-02-07 22:42:04 UTC
Permalink
Better to add it to Makefile.source and Meson in this patch. Other than
this,
Post by James Zhu
Implement required IBs for UVD HEVC encode.
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
1 file changed, 1115 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..17a39c2
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1115 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RADEON_ENC_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+RADEON_ENC_CS(cmd)
+#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
+ enc->total_task_size += *begin;}
+
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
+static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
+
+static void
+radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc,
+ struct pb_buffer *buf, enum radeon_bo_usage usage,
+ enum radeon_bo_domain domain, signed offset)
+{
+ enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain, RADEON_PRIO_VCE);
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RADEON_ENC_CS(addr >> 32);
+ RADEON_ENC_CS(addr);
+}
+
+static void
+radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *enc,
+ bool set)
+{
+ if (set != enc->emulation_prevention) {
+ enc->emulation_prevention = set;
+ enc->num_zeros = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->byte_index == 0)
+ enc->cs->current.buf[enc->cs->current.cdw] = 0;
+ enc->cs->current.buf[enc->cs->current.cdw] |=
+ ((unsigned int) (byte) << index_to_shifts[enc->byte_index]);
+ enc->byte_index++;
+
+ if (enc->byte_index >= 4) {
+ enc->byte_index = 0;
+ enc->cs->current.cdw++;
+ }
+}
+
+static void
+radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->emulation_prevention) {
+ if ((enc->num_zeros >= 2)
+ && ((byte == 0x00) || (byte == 0x01) || (byte == 0x03))) {
+ radeon_uvd_enc_output_one_byte(enc, 0x03);
+ enc->bits_output += 8;
+ enc->num_zeros = 0;
+ }
+ enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
+ }
+}
+
+static void
+radeon_uvd_enc_code_fixed_bits(struct radeon_uvd_encoder *enc,
+ unsigned int value, unsigned int num_bits)
+{
+ unsigned int bits_to_pack = 0;
+
+ while (num_bits > 0) {
+ unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits));
+ bits_to_pack =
+ num_bits >
+ (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits;
+
+ if (bits_to_pack < num_bits)
+ value_to_pack = value_to_pack >> (num_bits - bits_to_pack);
+
+ enc->shifter |=
+ value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack);
+ num_bits -= bits_to_pack;
+ enc->bits_in_shifter += bits_to_pack;
+
+ while (enc->bits_in_shifter >= 8) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ enc->shifter <<= 8;
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_in_shifter -= 8;
+ enc->bits_output += 8;
+ }
+ }
+}
+
+static void
+radeon_uvd_enc_reset(struct radeon_uvd_encoder *enc)
+{
+ enc->emulation_prevention = false;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->bits_output = 0;
+ enc->num_zeros = 0;
+ enc->byte_index = 0;
+}
+
+static void
+radeon_uvd_enc_byte_align(struct radeon_uvd_encoder *enc)
+{
+ unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8;
+
+ if (num_padding_zeros > 0)
+ radeon_uvd_enc_code_fixed_bits(enc, 0, num_padding_zeros);
+}
+
+static void
+radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc)
+{
+ if (enc->bits_in_shifter != 0) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_output += enc->bits_in_shifter;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->num_zeros = 0;
+ }
+
+ if (enc->byte_index > 0) {
+ enc->cs->current.cdw++;
+ enc->byte_index = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_code_ue(struct radeon_uvd_encoder *enc, unsigned int value)
+{
+ int x = -1;
+ unsigned int ue_code = value + 1;
+ value += 1;
+
+ while (value) {
+ value = (value >> 1);
+ x += 1;
+ }
+
+ unsigned int ue_length = (x << 1) + 1;
+ radeon_uvd_enc_code_fixed_bits(enc, ue_code, ue_length);
+}
+
+static void
+radeon_uvd_enc_code_se(struct radeon_uvd_encoder *enc, int value)
+{
+ unsigned int v = 0;
+
+ if (value != 0)
+ v = (value <
+ 0 ? ((unsigned int) (0 - value) << 1)
+ : (((unsigned int) (value) << 1) - 1));
+
+ radeon_uvd_enc_code_ue(enc, v);
+}
+
+static void
+radeon_uvd_enc_session_info(struct radeon_uvd_encoder *enc)
+{
+ unsigned int interface_version =
+ ((RENC_UVD_FW_INTERFACE_MAJOR_VERSION <<
+ RENC_UVD_IF_MAJOR_VERSION_SHIFT) |
+ (RENC_UVD_FW_INTERFACE_MINOR_VERSION <<
+ RENC_UVD_IF_MINOR_VERSION_SHIFT));
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INFO);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(interface_version);
+ RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_feedback)
+{
+ enc->enc_pic.task_info.task_id++;
+
+ if (need_feedback)
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 1;
+ else
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_TASK_INFO);
+ enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++];
+ RADEON_ENC_CS(enc->enc_pic.task_info.task_id);
+ RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.session_init.aligned_picture_width =
+ align(enc->base.width, 64);
+ enc->enc_pic.session_init.aligned_picture_height =
+ align(enc->base.height, 16);
+ enc->enc_pic.session_init.padding_width =
+ enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+ enc->enc_pic.session_init.padding_height =
+ enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+ enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
+ enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_control(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
+ enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_select(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_sel.temporal_layer_index = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_SELECT);
+ RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_control_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.hevc_slice_ctrl.slice_control_mode =
+ RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS;
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice =
+ align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64;
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice_segment =
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice_segment);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_spec_misc_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 =
+ pic->seq.log2_min_luma_coding_block_size_minus3;
+ enc->enc_pic.hevc_spec_misc.amp_disabled = !pic->seq.amp_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled =
+ pic->seq.strong_intra_smoothing_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag =
+ pic->pic.constrained_intra_pred_flag;
+ enc->enc_pic.hevc_spec_misc.cabac_init_flag = pic->slice.cabac_init_flag;
+ enc->enc_pic.hevc_spec_misc.half_pel_enabled = 1;
+ enc->enc_pic.hevc_spec_misc.quarter_pel_enabled = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SPEC_MISC);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.cabac_init_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.half_pel_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.quarter_pel_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc.vbv_buf_lv;
+ switch (pic->rc.rate_ctrl_method) {
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ break;
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_CBR;
+ break;
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+ break;
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ }
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.rate_control_method);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.vbv_buffer_level);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_layer_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_layer_init.target_bit_rate = pic->rc.target_bitrate;
+ enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rc.peak_bitrate;
+ enc->enc_pic.rc_layer_init.frame_rate_num = pic->rc.frame_rate_num;
+ enc->enc_pic.rc_layer_init.frame_rate_den = pic->rc.frame_rate_den;
+ enc->enc_pic.rc_layer_init.vbv_buffer_size = pic->rc.vbv_buffer_size;
+ enc->enc_pic.rc_layer_init.avg_target_bits_per_picture =
+ pic->rc.target_bits_picture;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer =
+ pic->rc.peak_bits_picture_integer;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional =
+ pic->rc.peak_bits_picture_fraction;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.target_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_num);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_den);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.vbv_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.avg_target_bits_per_picture);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_deblocking_filter_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled =
+ pic->slice.slice_loop_filter_across_slices_enabled_flag;
+ enc->enc_pic.hevc_deblock.deblocking_filter_disabled =
+ pic->slice.slice_deblocking_filter_disabled_flag;
+ enc->enc_pic.hevc_deblock.beta_offset_div2 =
+ pic->slice.slice_beta_offset_div2;
+ enc->enc_pic.hevc_deblock.tc_offset_div2 = pic->slice.slice_tc_offset_div2;
+ enc->enc_pic.hevc_deblock.cb_qp_offset = pic->slice.slice_cb_qp_offset;
+ enc->enc_pic.hevc_deblock.cr_qp_offset = pic->slice.slice_cr_qp_offset;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_DEBLOCKING_FILTER);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.quality_params.vbaq_mode = 0;
+ enc->enc_pic.quality_params.scene_change_sensitivity = 0;
+ enc->enc_pic.quality_params.scene_change_min_idr_interval = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_QUALITY_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_width);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_height);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ //Only support CTBSize 64
+ radeon_uvd_enc_code_ue(enc,
+ 6 -
+ (enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3 + 3));
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.log2_min_transform_block_size_minus2);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.
+ log2_diff_max_min_transform_block_size);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_inter);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ !enc->enc_pic.hevc_spec_misc.amp_disabled,
+ 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ sample_adaptive_offset_enabled_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
+
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ strong_intra_smoothing_enabled, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_se(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ constrained_intra_pred_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ deblocking_filter_disabled, 1);
+
+ if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ switch (enc->enc_pic.picture_type) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
+{
+ uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ unsigned int inst_index = 0;
+ unsigned int bit_index = 0;
+ unsigned int bits_copied = 0;
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
+ inst_index++;
+
+ if ((enc->enc_pic.nal_unit_type >= 16)
+ && (enc->enc_pic.nal_unit_type <= 23))
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
+ inst_index++;
+
+ switch (enc->enc_pic.picture_type) {
+ radeon_uvd_enc_code_ue(enc, 0x2);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ }
+
+ if ((enc->enc_pic.nal_unit_type != 19)
+ && (enc->enc_pic.nal_unit_type != 20)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.frame_num %
+ enc->enc_pic.max_poc,
+ enc->enc_pic.log2_max_poc);
+ if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ else {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ }
+ }
+
+ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
+ (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ cabac_init_flag, 1);
+ radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
+ }
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+ inst_index++;
+
+ if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
+ (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+ }
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
+
+ for (int i = bit_index;
+ i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
+ j++) {
+ RADEON_ENC_CS(instruction[j]);
+ RADEON_ENC_CS(num_bits[j]);
+ }
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+
+ enc->enc_pic.ctx_buf.swizzle_mode = 0;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER);
+ RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures);
+ /* reconstructed_picture_1_luma_offset */
+ RADEON_ENC_CS(0x00000000);
+ /* reconstructed_picture_1_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16));
+ /* reconstructed_picture_2_luma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch *
+ align(enc->base.height, 16) * 3 / 2);
+ /* reconstructed_picture_2_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16) * 5 / 2);
+
+ for (int i = 0; i < 136; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.bit_buf.mode = RENC_UVD_SWIZZLE_MODE_LINEAR;
+ enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size;
+ enc->enc_pic.bit_buf.video_bitstream_data_offset = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.mode);
+ RADEON_ENC_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_data_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_feedback(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.fb_buf.mode = RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR;
+ enc->enc_pic.fb_buf.feedback_buffer_size = 16;
+ enc->enc_pic.fb_buf.feedback_data_size = 40;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_FEEDBACK_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.mode);
+ RADEON_ENC_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_data_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_intra_refresh(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.intra_ref.intra_refresh_mode =
+ RENC_UVD_INTRA_REFRESH_MODE_NONE;
+ enc->enc_pic.intra_ref.offset = 0;
+ enc->enc_pic.intra_ref.region_size = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INTRA_REFRESH);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.intra_refresh_mode);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.offset);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.region_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_per_pic(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_per_pic.qp = pic->rc.quant_i_frames;
+ enc->enc_pic.rc_per_pic.min_qp_app = 0;
+ enc->enc_pic.rc_per_pic.max_qp_app = 51;
+ enc->enc_pic.rc_per_pic.max_au_size = 0;
+ enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc.fill_data_enable;
+ enc->enc_pic.rc_per_pic.skip_frame_enable = false;
+ enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc.enforce_hrd;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enabled_filler_data);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.skip_frame_enable);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enforce_hrd);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+ switch (enc->enc_pic.picture_type) {
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ break;
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P;
+ break;
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P_SKIP;
+ break;
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_B;
+ break;
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ }
+
+ enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.enc_params.input_pic_swizzle_mode =
+ RENC_UVD_SWIZZLE_MODE_LINEAR;
+
+ if (enc->enc_pic.enc_params.pic_type == RENC_UVD_PICTURE_TYPE_I)
+ enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF;
+ else
+ enc->enc_pic.enc_params.reference_picture_index =
+ (enc->enc_pic.frame_num - 1) % 2;
+
+ enc->enc_pic.enc_params.reconstructed_picture_index =
+ enc->enc_pic.frame_num % 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size);
+
+ if (rscreen->info.chip_class < GFX9) {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.legacy.level[0].offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.legacy.level[0].offset);
+ }
+ else {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.gfx9.surf_offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.gfx9.surf_offset);
+ }
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INITIALIZE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_close(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_CLOSE_SESSION);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_enc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_ENCODE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc_vbv(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_speed(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE);
+ RADEON_ENC_END();
+}
+
+static void
+begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_init(enc);
+
+ radeon_uvd_enc_session_init_hevc(enc);
+ radeon_uvd_enc_slice_control_hevc(enc);
+ radeon_uvd_enc_spec_misc_hevc(enc, pic);
+ radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
+
+ radeon_uvd_enc_layer_control(enc);
+ radeon_uvd_enc_rc_session_init(enc, pic);
+ radeon_uvd_enc_quality_params(enc);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_layer_init(enc, pic);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_per_pic(enc, pic);
+ radeon_uvd_enc_op_init_rc(enc);
+ radeon_uvd_enc_op_init_rc_vbv(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+encode(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+
+ radeon_uvd_enc_nalu_aud_hevc(enc);
+ if (enc->enc_pic.is_idr) {
+ radeon_uvd_enc_nalu_vps_hevc(enc);
+ radeon_uvd_enc_nalu_pps_hevc(enc);
+ radeon_uvd_enc_nalu_sps_hevc(enc);
+ }
+ radeon_uvd_enc_slice_header_hevc(enc);
+ radeon_uvd_enc_encode_params_hevc(enc);
+
+ radeon_uvd_enc_ctx(enc);
+ radeon_uvd_enc_bitstream(enc);
+ radeon_uvd_enc_feedback(enc);
+ radeon_uvd_enc_intra_refresh(enc);
+
+ radeon_uvd_enc_op_speed(enc);
+ radeon_uvd_enc_op_enc(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+destroy(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_close(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+void
+radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
+{
+ enc->begin = begin;
+ enc->encode = encode;
+ enc->destroy = destroy;
+}
Mark Thompson
2018-02-08 22:13:23 UTC
Permalink
Post by James Zhu
Implement required IBs for UVD HEVC encode.
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
1 file changed, 1115 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..17a39c2
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1115 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RADEON_ENC_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+RADEON_ENC_CS(cmd)
+#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
+ enc->total_task_size += *begin;}
+
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
This looks very suspicious in an H.265 file, because those are H.264 profile values...
Post by James Zhu
+static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
+
...
+
+static void
+radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.session_init.aligned_picture_width =
+ align(enc->base.width, 64);
Do you really need to pad width to 64 rather than the MinCbSizeY?
Post by James Zhu
+ enc->enc_pic.session_init.aligned_picture_height =
+ align(enc->base.height, 16);
+ enc->enc_pic.session_init.padding_width =
+ enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+ enc->enc_pic.session_init.padding_height =
+ enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+ enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
+ enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+ RADEON_ENC_END();
+}
+
...
+
+static void
+radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_width);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_height);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
Given that you've padded, conformance_window_flag should be set and the real size of the picture filled here.
Post by James Zhu
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ //Only support CTBSize 64
+ radeon_uvd_enc_code_ue(enc,
+ 6 -
+ (enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3 + 3));
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.log2_min_transform_block_size_minus2);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.
+ log2_diff_max_min_transform_block_size);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_inter);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ !enc->enc_pic.hevc_spec_misc.amp_disabled,
+ 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ sample_adaptive_offset_enabled_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
Missing more syntax elements which should be present if pcm_enabled_flag is set?
Post by James Zhu
+
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ strong_intra_smoothing_enabled, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
The missing timing information here is rather unfortunate. You should know at least the framerate?
Post by James Zhu
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
Concatenating elements is confusing.
Post by James Zhu
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_se(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ constrained_intra_pred_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
QP does not vary within slices?
Post by James Zhu
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ deblocking_filter_disabled, 1);
+
+ if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
The PTL section is the same as the SPS, maybe move it to a separate function to avoid the duplication?
Post by James Zhu
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ switch (enc->enc_pic.picture_type) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
+{
+ uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ unsigned int inst_index = 0;
+ unsigned int bit_index = 0;
+ unsigned int bits_copied = 0;
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
+ inst_index++;
+
+ if ((enc->enc_pic.nal_unit_type >= 16)
+ && (enc->enc_pic.nal_unit_type <= 23))
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
Does this instruction somehow remove everything after this point in a dependent slice? (You seem to still be writing the rest anyway.)
Post by James Zhu
+ inst_index++;
+
+ switch (enc->enc_pic.picture_type) {
+ radeon_uvd_enc_code_ue(enc, 0x2);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x1);
Does anything hit this default case? If so then it should probably be explicitly present, if not then not including it at all or assert()ing might be clearer.
Post by James Zhu
+ }
+
+ if ((enc->enc_pic.nal_unit_type != 19)
+ && (enc->enc_pic.nal_unit_type != 20)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.frame_num %
+ enc->enc_pic.max_poc,
+ enc->enc_pic.log2_max_poc);
+ if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ else {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
No short-term references in the non-P case - this only supports I and P frames?
Post by James Zhu
+ }
+ }
+
Missing SAO flags (it was allowed in the SPS).
Post by James Zhu
+ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
+ (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
Missing mvd_l1_zero_flag.
Post by James Zhu
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ cabac_init_flag, 1);
+ radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
+ }
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+ inst_index++;
+
+ if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
+ (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+ }
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
+
+ for (int i = bit_index;
+ i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
+ j++) {
+ RADEON_ENC_CS(instruction[j]);
+ RADEON_ENC_CS(num_bits[j]);
+ }
+
+ RADEON_ENC_END();
+}
+
...
+
+static void
+begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_init(enc);
+
+ radeon_uvd_enc_session_init_hevc(enc);
+ radeon_uvd_enc_slice_control_hevc(enc);
+ radeon_uvd_enc_spec_misc_hevc(enc, pic);
+ radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
+
+ radeon_uvd_enc_layer_control(enc);
+ radeon_uvd_enc_rc_session_init(enc, pic);
+ radeon_uvd_enc_quality_params(enc);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_layer_init(enc, pic);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_per_pic(enc, pic);
+ radeon_uvd_enc_op_init_rc(enc);
+ radeon_uvd_enc_op_init_rc_vbv(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+encode(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+
+ radeon_uvd_enc_nalu_aud_hevc(enc);
Is it really appropriate to always generate AUDs?
Post by James Zhu
+ if (enc->enc_pic.is_idr) {
+ radeon_uvd_enc_nalu_vps_hevc(enc);
+ radeon_uvd_enc_nalu_pps_hevc(enc);
+ radeon_uvd_enc_nalu_sps_hevc(enc);
+ }
+ radeon_uvd_enc_slice_header_hevc(enc);
+ radeon_uvd_enc_encode_params_hevc(enc);
+
+ radeon_uvd_enc_ctx(enc);
+ radeon_uvd_enc_bitstream(enc);
+ radeon_uvd_enc_feedback(enc);
+ radeon_uvd_enc_intra_refresh(enc);
+
+ radeon_uvd_enc_op_speed(enc);
+ radeon_uvd_enc_op_enc(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+destroy(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_close(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+void
+radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
+{
+ enc->begin = begin;
+ enc->encode = encode;
+ enc->destroy = destroy;
+}
Boyuan Zhang
2018-02-09 20:11:21 UTC
Permalink
Post by Mark Thompson
Post by James Zhu
Implement required IBs for UVD HEVC encode.
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
1 file changed, 1115 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..17a39c2
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1115 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RADEON_ENC_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+RADEON_ENC_CS(cmd)
+#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
+ enc->total_task_size += *begin;}
+
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
This looks very suspicious in an H.265 file, because those are H.264 profile values...
Seems that this line is copied from VCN h.264 encode. Not being used
anywhere, and should be removed.
@James, can you remove this line please?
Post by Mark Thompson
Post by James Zhu
+static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
+
...
+
+static void
+radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.session_init.aligned_picture_width =
+ align(enc->base.width, 64);
Do you really need to pad width to 64 rather than the MinCbSizeY?
Yes, this is based on the spec as well as hardware requirement.
Post by Mark Thompson
Post by James Zhu
+ enc->enc_pic.session_init.aligned_picture_height =
+ align(enc->base.height, 16);
+ enc->enc_pic.session_init.padding_width =
+ enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+ enc->enc_pic.session_init.padding_height =
+ enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+ enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
+ enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+ RADEON_ENC_END();
+}
+
...
+
+static void
+radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_width);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_height);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
Given that you've padded, conformance_window_flag should be set and the real size of the picture filled here.
 Agree, should fix it.
@James, can you use crop_left/right/top/bottom to determine
conformance_window_flag, and add logics for those values please? Please
refer to VCN H.264 logic.
Post by Mark Thompson
Post by James Zhu
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ //Only support CTBSize 64
+ radeon_uvd_enc_code_ue(enc,
+ 6 -
+ (enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3 + 3));
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.log2_min_transform_block_size_minus2);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.
+ log2_diff_max_min_transform_block_size);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_inter);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ !enc->enc_pic.hevc_spec_misc.amp_disabled,
+ 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ sample_adaptive_offset_enabled_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
Missing more syntax elements which should be present if pcm_enabled_flag is set?
Right. We should hardcode pcm_enabled_flag to 0, since temporarily we do
not support pcm enable.
@James, can you replace  enc->enc_pic.pcm_enabled_flag with 0 please?
Since we don't support it now.
Post by Mark Thompson
Post by James Zhu
+
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ strong_intra_smoothing_enabled, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
The missing timing information here is rather unfortunate. You should know at least the framerate?
Yes, we are planning to add this feature in future patches.
Post by Mark Thompson
Post by James Zhu
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
Concatenating elements is confusing.
Right, it seems a bit confusing here. What we really mean is:
output_flag_resent_flag: 0  u(1)
num_extra_slice_header_bits: 0  u(3)
Post by Mark Thompson
Post by James Zhu
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_se(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ constrained_intra_pred_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
QP does not vary within slices?
We are working on CBR and VBR right now. So far only CQP is supported.
So cu_qp_delta_enabled case will be added in future patch.
Post by Mark Thompson
Post by James Zhu
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ deblocking_filter_disabled, 1);
+
+ if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
The PTL section is the same as the SPS, maybe move it to a separate function to avoid the duplication?
Agree, code optimization/re-organizing work is actually ongoing. But it
will be a separate patch since it will affect VCN encode as well.
Post by Mark Thompson
Post by James Zhu
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ switch (enc->enc_pic.picture_type) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
+{
+ uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ unsigned int inst_index = 0;
+ unsigned int bit_index = 0;
+ unsigned int bits_copied = 0;
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
+ inst_index++;
+
+ if ((enc->enc_pic.nal_unit_type >= 16)
+ && (enc->enc_pic.nal_unit_type <= 23))
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
Does this instruction somehow remove everything after this point in a dependent slice? (You seem to still be writing the rest anyway.)
Yes, in dependent slice only.
Post by Mark Thompson
Post by James Zhu
+ inst_index++;
+
+ switch (enc->enc_pic.picture_type) {
+ radeon_uvd_enc_code_ue(enc, 0x2);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x1);
Does anything hit this default case? If so then it should probably be explicitly present, if not then not including it at all or assert()ing might be clearer.
Seems only if data got corrupted, and yes, better to have assertion here.
@James, can you change it accordingly please?
Post by Mark Thompson
Post by James Zhu
+ }
+
+ if ((enc->enc_pic.nal_unit_type != 19)
+ && (enc->enc_pic.nal_unit_type != 20)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.frame_num %
+ enc->enc_pic.max_poc,
+ enc->enc_pic.log2_max_poc);
+ if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ else {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
No short-term references in the non-P case - this only supports I and P frames?
Yes, we only support I and P frames for now.
Post by Mark Thompson
Post by James Zhu
+ }
+ }
+
Missing SAO flags (it was allowed in the SPS).
Right, should hardcode SAO in SPS to 0 since we don't support it right now.
@James, can you hardcode SAO flag in SPS to 0 and add a comment there
please?
Post by Mark Thompson
Post by James Zhu
+ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
+ (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
Missing mvd_l1_zero_flag.
Unfortunately we don't have B frame support now.
Post by Mark Thompson
Post by James Zhu
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ cabac_init_flag, 1);
+ radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
+ }
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+ inst_index++;
+
+ if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
+ (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+ }
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
+
+ for (int i = bit_index;
+ i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
+ j++) {
+ RADEON_ENC_CS(instruction[j]);
+ RADEON_ENC_CS(num_bits[j]);
+ }
+
+ RADEON_ENC_END();
+}
+
...
+
+static void
+begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_init(enc);
+
+ radeon_uvd_enc_session_init_hevc(enc);
+ radeon_uvd_enc_slice_control_hevc(enc);
+ radeon_uvd_enc_spec_misc_hevc(enc, pic);
+ radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
+
+ radeon_uvd_enc_layer_control(enc);
+ radeon_uvd_enc_rc_session_init(enc, pic);
+ radeon_uvd_enc_quality_params(enc);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_layer_init(enc, pic);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_per_pic(enc, pic);
+ radeon_uvd_enc_op_init_rc(enc);
+ radeon_uvd_enc_op_init_rc_vbv(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+encode(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+
+ radeon_uvd_enc_nalu_aud_hevc(enc);
Is it really appropriate to always generate AUDs?
Seems fine based on spec. Actually it should be requested by the
applications based on their purposes. do you have any suggestion here?
Post by Mark Thompson
Post by James Zhu
+ if (enc->enc_pic.is_idr) {
+ radeon_uvd_enc_nalu_vps_hevc(enc);
+ radeon_uvd_enc_nalu_pps_hevc(enc);
+ radeon_uvd_enc_nalu_sps_hevc(enc);
+ }
+ radeon_uvd_enc_slice_header_hevc(enc);
+ radeon_uvd_enc_encode_params_hevc(enc);
+
+ radeon_uvd_enc_ctx(enc);
+ radeon_uvd_enc_bitstream(enc);
+ radeon_uvd_enc_feedback(enc);
+ radeon_uvd_enc_intra_refresh(enc);
+
+ radeon_uvd_enc_op_speed(enc);
+ radeon_uvd_enc_op_enc(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+destroy(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_close(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+void
+radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
+{
+ enc->begin = begin;
+ enc->encode = encode;
+ enc->destroy = destroy;
+}
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
James Zhu
2018-02-12 15:16:52 UTC
Permalink
Hi Mark,

thanks for point them out. [PATCH v4 3/8] / [PATCH v4 4/8] / [PATCH v4
5/8] update accordingly.

James.
Post by Mark Thompson
Post by James Zhu
Implement required IBs for UVD HEVC encode.
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
1 file changed, 1115 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..17a39c2
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1115 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RADEON_ENC_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+RADEON_ENC_CS(cmd)
+#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
+ enc->total_task_size += *begin;}
+
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
This looks very suspicious in an H.265 file, because those are H.264 profile values...
Post by James Zhu
+static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
+
...
+
+static void
+radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.session_init.aligned_picture_width =
+ align(enc->base.width, 64);
Do you really need to pad width to 64 rather than the MinCbSizeY?
Post by James Zhu
+ enc->enc_pic.session_init.aligned_picture_height =
+ align(enc->base.height, 16);
+ enc->enc_pic.session_init.padding_width =
+ enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+ enc->enc_pic.session_init.padding_height =
+ enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+ enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
+ enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+ RADEON_ENC_END();
+}
+
...
+
+static void
+radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_width);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_height);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
Given that you've padded, conformance_window_flag should be set and the real size of the picture filled here.
Post by James Zhu
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ //Only support CTBSize 64
+ radeon_uvd_enc_code_ue(enc,
+ 6 -
+ (enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3 + 3));
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.log2_min_transform_block_size_minus2);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.
+ log2_diff_max_min_transform_block_size);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_inter);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ !enc->enc_pic.hevc_spec_misc.amp_disabled,
+ 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ sample_adaptive_offset_enabled_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
Missing more syntax elements which should be present if pcm_enabled_flag is set?
Post by James Zhu
+
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ strong_intra_smoothing_enabled, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
The missing timing information here is rather unfortunate. You should know at least the framerate?
Post by James Zhu
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
Concatenating elements is confusing.
Post by James Zhu
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_se(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ constrained_intra_pred_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
QP does not vary within slices?
Post by James Zhu
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ deblocking_filter_disabled, 1);
+
+ if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
The PTL section is the same as the SPS, maybe move it to a separate function to avoid the duplication?
Post by James Zhu
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ switch (enc->enc_pic.picture_type) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ break;
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
+{
+ uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ unsigned int inst_index = 0;
+ unsigned int bit_index = 0;
+ unsigned int bits_copied = 0;
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
+ inst_index++;
+
+ if ((enc->enc_pic.nal_unit_type >= 16)
+ && (enc->enc_pic.nal_unit_type <= 23))
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
Does this instruction somehow remove everything after this point in a dependent slice? (You seem to still be writing the rest anyway.)
Post by James Zhu
+ inst_index++;
+
+ switch (enc->enc_pic.picture_type) {
+ radeon_uvd_enc_code_ue(enc, 0x2);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ break;
+ radeon_uvd_enc_code_ue(enc, 0x1);
Does anything hit this default case? If so then it should probably be explicitly present, if not then not including it at all or assert()ing might be clearer.
Post by James Zhu
+ }
+
+ if ((enc->enc_pic.nal_unit_type != 19)
+ && (enc->enc_pic.nal_unit_type != 20)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.frame_num %
+ enc->enc_pic.max_poc,
+ enc->enc_pic.log2_max_poc);
+ if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ else {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
No short-term references in the non-P case - this only supports I and P frames?
Post by James Zhu
+ }
+ }
+
Missing SAO flags (it was allowed in the SPS).
Post by James Zhu
+ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
+ (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
Missing mvd_l1_zero_flag.
Post by James Zhu
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ cabac_init_flag, 1);
+ radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
+ }
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+ inst_index++;
+
+ if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
+ (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+ }
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
+
+ for (int i = bit_index;
+ i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
+ j++) {
+ RADEON_ENC_CS(instruction[j]);
+ RADEON_ENC_CS(num_bits[j]);
+ }
+
+ RADEON_ENC_END();
+}
+
...
+
+static void
+begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_init(enc);
+
+ radeon_uvd_enc_session_init_hevc(enc);
+ radeon_uvd_enc_slice_control_hevc(enc);
+ radeon_uvd_enc_spec_misc_hevc(enc, pic);
+ radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
+
+ radeon_uvd_enc_layer_control(enc);
+ radeon_uvd_enc_rc_session_init(enc, pic);
+ radeon_uvd_enc_quality_params(enc);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_layer_init(enc, pic);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_per_pic(enc, pic);
+ radeon_uvd_enc_op_init_rc(enc);
+ radeon_uvd_enc_op_init_rc_vbv(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+encode(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+
+ radeon_uvd_enc_nalu_aud_hevc(enc);
Is it really appropriate to always generate AUDs?
Post by James Zhu
+ if (enc->enc_pic.is_idr) {
+ radeon_uvd_enc_nalu_vps_hevc(enc);
+ radeon_uvd_enc_nalu_pps_hevc(enc);
+ radeon_uvd_enc_nalu_sps_hevc(enc);
+ }
+ radeon_uvd_enc_slice_header_hevc(enc);
+ radeon_uvd_enc_encode_params_hevc(enc);
+
+ radeon_uvd_enc_ctx(enc);
+ radeon_uvd_enc_bitstream(enc);
+ radeon_uvd_enc_feedback(enc);
+ radeon_uvd_enc_intra_refresh(enc);
+
+ radeon_uvd_enc_op_speed(enc);
+ radeon_uvd_enc_op_enc(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+destroy(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_close(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+void
+radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
+{
+ enc->begin = begin;
+ enc->encode = encode;
+ enc->destroy = destroy;
+}
James Zhu
2018-02-09 20:35:05 UTC
Permalink
Implement required IBs for UVD HEVC encode.

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
1 file changed, 1115 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..2b8156e
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1115 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RADEON_ENC_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+RADEON_ENC_CS(cmd)
+#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
+ enc->total_task_size += *begin;}
+
+static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
+
+static void
+radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc,
+ struct pb_buffer *buf, enum radeon_bo_usage usage,
+ enum radeon_bo_domain domain, signed offset)
+{
+ enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain, RADEON_PRIO_VCE);
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RADEON_ENC_CS(addr >> 32);
+ RADEON_ENC_CS(addr);
+}
+
+static void
+radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *enc,
+ bool set)
+{
+ if (set != enc->emulation_prevention) {
+ enc->emulation_prevention = set;
+ enc->num_zeros = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->byte_index == 0)
+ enc->cs->current.buf[enc->cs->current.cdw] = 0;
+ enc->cs->current.buf[enc->cs->current.cdw] |=
+ ((unsigned int) (byte) << index_to_shifts[enc->byte_index]);
+ enc->byte_index++;
+
+ if (enc->byte_index >= 4) {
+ enc->byte_index = 0;
+ enc->cs->current.cdw++;
+ }
+}
+
+static void
+radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->emulation_prevention) {
+ if ((enc->num_zeros >= 2)
+ && ((byte == 0x00) || (byte == 0x01) || (byte == 0x03))) {
+ radeon_uvd_enc_output_one_byte(enc, 0x03);
+ enc->bits_output += 8;
+ enc->num_zeros = 0;
+ }
+ enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
+ }
+}
+
+static void
+radeon_uvd_enc_code_fixed_bits(struct radeon_uvd_encoder *enc,
+ unsigned int value, unsigned int num_bits)
+{
+ unsigned int bits_to_pack = 0;
+
+ while (num_bits > 0) {
+ unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits));
+ bits_to_pack =
+ num_bits >
+ (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits;
+
+ if (bits_to_pack < num_bits)
+ value_to_pack = value_to_pack >> (num_bits - bits_to_pack);
+
+ enc->shifter |=
+ value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack);
+ num_bits -= bits_to_pack;
+ enc->bits_in_shifter += bits_to_pack;
+
+ while (enc->bits_in_shifter >= 8) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ enc->shifter <<= 8;
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_in_shifter -= 8;
+ enc->bits_output += 8;
+ }
+ }
+}
+
+static void
+radeon_uvd_enc_reset(struct radeon_uvd_encoder *enc)
+{
+ enc->emulation_prevention = false;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->bits_output = 0;
+ enc->num_zeros = 0;
+ enc->byte_index = 0;
+}
+
+static void
+radeon_uvd_enc_byte_align(struct radeon_uvd_encoder *enc)
+{
+ unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8;
+
+ if (num_padding_zeros > 0)
+ radeon_uvd_enc_code_fixed_bits(enc, 0, num_padding_zeros);
+}
+
+static void
+radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc)
+{
+ if (enc->bits_in_shifter != 0) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_output += enc->bits_in_shifter;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->num_zeros = 0;
+ }
+
+ if (enc->byte_index > 0) {
+ enc->cs->current.cdw++;
+ enc->byte_index = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_code_ue(struct radeon_uvd_encoder *enc, unsigned int value)
+{
+ int x = -1;
+ unsigned int ue_code = value + 1;
+ value += 1;
+
+ while (value) {
+ value = (value >> 1);
+ x += 1;
+ }
+
+ unsigned int ue_length = (x << 1) + 1;
+ radeon_uvd_enc_code_fixed_bits(enc, ue_code, ue_length);
+}
+
+static void
+radeon_uvd_enc_code_se(struct radeon_uvd_encoder *enc, int value)
+{
+ unsigned int v = 0;
+
+ if (value != 0)
+ v = (value <
+ 0 ? ((unsigned int) (0 - value) << 1)
+ : (((unsigned int) (value) << 1) - 1));
+
+ radeon_uvd_enc_code_ue(enc, v);
+}
+
+static void
+radeon_uvd_enc_session_info(struct radeon_uvd_encoder *enc)
+{
+ unsigned int interface_version =
+ ((RENC_UVD_FW_INTERFACE_MAJOR_VERSION <<
+ RENC_UVD_IF_MAJOR_VERSION_SHIFT) |
+ (RENC_UVD_FW_INTERFACE_MINOR_VERSION <<
+ RENC_UVD_IF_MINOR_VERSION_SHIFT));
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INFO);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(interface_version);
+ RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_feedback)
+{
+ enc->enc_pic.task_info.task_id++;
+
+ if (need_feedback)
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 1;
+ else
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_TASK_INFO);
+ enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++];
+ RADEON_ENC_CS(enc->enc_pic.task_info.task_id);
+ RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.session_init.aligned_picture_width =
+ align(enc->base.width, 64);
+ enc->enc_pic.session_init.aligned_picture_height =
+ align(enc->base.height, 16);
+ enc->enc_pic.session_init.padding_width =
+ enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+ enc->enc_pic.session_init.padding_height =
+ enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+ enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
+ enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_control(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
+ enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_select(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_sel.temporal_layer_index = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_SELECT);
+ RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_control_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.hevc_slice_ctrl.slice_control_mode =
+ RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS;
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice =
+ align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64;
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice_segment =
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.
+ num_ctbs_per_slice_segment);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_spec_misc_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 =
+ pic->seq.log2_min_luma_coding_block_size_minus3;
+ enc->enc_pic.hevc_spec_misc.amp_disabled = !pic->seq.amp_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled =
+ pic->seq.strong_intra_smoothing_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag =
+ pic->pic.constrained_intra_pred_flag;
+ enc->enc_pic.hevc_spec_misc.cabac_init_flag = pic->slice.cabac_init_flag;
+ enc->enc_pic.hevc_spec_misc.half_pel_enabled = 1;
+ enc->enc_pic.hevc_spec_misc.quarter_pel_enabled = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SPEC_MISC);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.cabac_init_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.half_pel_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.quarter_pel_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc.vbv_buf_lv;
+ switch (pic->rc.rate_ctrl_method) {
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ break;
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP:
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_CBR;
+ break;
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP:
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+ break;
+ default:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ }
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.rate_control_method);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.vbv_buffer_level);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_layer_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_layer_init.target_bit_rate = pic->rc.target_bitrate;
+ enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rc.peak_bitrate;
+ enc->enc_pic.rc_layer_init.frame_rate_num = pic->rc.frame_rate_num;
+ enc->enc_pic.rc_layer_init.frame_rate_den = pic->rc.frame_rate_den;
+ enc->enc_pic.rc_layer_init.vbv_buffer_size = pic->rc.vbv_buffer_size;
+ enc->enc_pic.rc_layer_init.avg_target_bits_per_picture =
+ pic->rc.target_bits_picture;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer =
+ pic->rc.peak_bits_picture_integer;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional =
+ pic->rc.peak_bits_picture_fraction;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.target_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_num);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_den);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.vbv_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.avg_target_bits_per_picture);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_deblocking_filter_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled =
+ pic->slice.slice_loop_filter_across_slices_enabled_flag;
+ enc->enc_pic.hevc_deblock.deblocking_filter_disabled =
+ pic->slice.slice_deblocking_filter_disabled_flag;
+ enc->enc_pic.hevc_deblock.beta_offset_div2 =
+ pic->slice.slice_beta_offset_div2;
+ enc->enc_pic.hevc_deblock.tc_offset_div2 = pic->slice.slice_tc_offset_div2;
+ enc->enc_pic.hevc_deblock.cb_qp_offset = pic->slice.slice_cb_qp_offset;
+ enc->enc_pic.hevc_deblock.cr_qp_offset = pic->slice.slice_cr_qp_offset;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_DEBLOCKING_FILTER);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.quality_params.vbaq_mode = 0;
+ enc->enc_pic.quality_params.scene_change_sensitivity = 0;
+ enc->enc_pic.quality_params.scene_change_min_idr_interval = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_QUALITY_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_width);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_height);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ //Only support CTBSize 64
+ radeon_uvd_enc_code_ue(enc,
+ 6 -
+ (enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3 + 3));
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.log2_min_transform_block_size_minus2);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.
+ log2_diff_max_min_transform_block_size);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_inter);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ !enc->enc_pic.hevc_spec_misc.amp_disabled,
+ 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ sample_adaptive_offset_enabled_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
+
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ strong_intra_smoothing_enabled, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_se(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ constrained_intra_pred_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ deblocking_filter_disabled, 1);
+
+ if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ break;
+ default:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
+{
+ uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ unsigned int inst_index = 0;
+ unsigned int bit_index = 0;
+ unsigned int bits_copied = 0;
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
+ inst_index++;
+
+ if ((enc->enc_pic.nal_unit_type >= 16)
+ && (enc->enc_pic.nal_unit_type <= 23))
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
+ inst_index++;
+
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ radeon_uvd_enc_code_ue(enc, 0x2);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ break;
+ default:
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ }
+
+ if ((enc->enc_pic.nal_unit_type != 19)
+ && (enc->enc_pic.nal_unit_type != 20)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.frame_num %
+ enc->enc_pic.max_poc,
+ enc->enc_pic.log2_max_poc);
+ if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ else {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ }
+ }
+
+ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
+ (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ cabac_init_flag, 1);
+ radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
+ }
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+ inst_index++;
+
+ if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
+ (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+ }
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
+
+ for (int i = bit_index;
+ i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
+ j++) {
+ RADEON_ENC_CS(instruction[j]);
+ RADEON_ENC_CS(num_bits[j]);
+ }
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+
+ enc->enc_pic.ctx_buf.swizzle_mode = 0;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER);
+ RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures);
+ /* reconstructed_picture_1_luma_offset */
+ RADEON_ENC_CS(0x00000000);
+ /* reconstructed_picture_1_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16));
+ /* reconstructed_picture_2_luma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch *
+ align(enc->base.height, 16) * 3 / 2);
+ /* reconstructed_picture_2_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16) * 5 / 2);
+
+ for (int i = 0; i < 136; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.bit_buf.mode = RENC_UVD_SWIZZLE_MODE_LINEAR;
+ enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size;
+ enc->enc_pic.bit_buf.video_bitstream_data_offset = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.mode);
+ RADEON_ENC_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_data_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_feedback(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.fb_buf.mode = RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR;
+ enc->enc_pic.fb_buf.feedback_buffer_size = 16;
+ enc->enc_pic.fb_buf.feedback_data_size = 40;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_FEEDBACK_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.mode);
+ RADEON_ENC_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_data_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_intra_refresh(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.intra_ref.intra_refresh_mode =
+ RENC_UVD_INTRA_REFRESH_MODE_NONE;
+ enc->enc_pic.intra_ref.offset = 0;
+ enc->enc_pic.intra_ref.region_size = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INTRA_REFRESH);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.intra_refresh_mode);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.offset);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.region_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_per_pic(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_per_pic.qp = pic->rc.quant_i_frames;
+ enc->enc_pic.rc_per_pic.min_qp_app = 0;
+ enc->enc_pic.rc_per_pic.max_qp_app = 51;
+ enc->enc_pic.rc_per_pic.max_au_size = 0;
+ enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc.fill_data_enable;
+ enc->enc_pic.rc_per_pic.skip_frame_enable = false;
+ enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc.enforce_hrd;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enabled_filler_data);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.skip_frame_enable);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enforce_hrd);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P_SKIP;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_B;
+ break;
+ default:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ }
+
+ enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.enc_params.input_pic_swizzle_mode =
+ RENC_UVD_SWIZZLE_MODE_LINEAR;
+
+ if (enc->enc_pic.enc_params.pic_type == RENC_UVD_PICTURE_TYPE_I)
+ enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF;
+ else
+ enc->enc_pic.enc_params.reference_picture_index =
+ (enc->enc_pic.frame_num - 1) % 2;
+
+ enc->enc_pic.enc_params.reconstructed_picture_index =
+ enc->enc_pic.frame_num % 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size);
+
+ if (rscreen->info.chip_class < GFX9) {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.legacy.level[0].offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.legacy.level[0].offset);
+ }
+ else {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.gfx9.surf_offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.gfx9.surf_offset);
+ }
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INITIALIZE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_close(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_CLOSE_SESSION);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_enc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_ENCODE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc_vbv(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_speed(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE);
+ RADEON_ENC_END();
+}
+
+static void
+begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_init(enc);
+
+ radeon_uvd_enc_session_init_hevc(enc);
+ radeon_uvd_enc_slice_control_hevc(enc);
+ radeon_uvd_enc_spec_misc_hevc(enc, pic);
+ radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
+
+ radeon_uvd_enc_layer_control(enc);
+ radeon_uvd_enc_rc_session_init(enc, pic);
+ radeon_uvd_enc_quality_params(enc);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_layer_init(enc, pic);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_per_pic(enc, pic);
+ radeon_uvd_enc_op_init_rc(enc);
+ radeon_uvd_enc_op_init_rc_vbv(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+encode(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+
+ radeon_uvd_enc_nalu_aud_hevc(enc);
+
+ if (enc->enc_pic.is_iframe) {
+ radeon_uvd_enc_nalu_vps_hevc(enc);
+ radeon_uvd_enc_nalu_pps_hevc(enc);
+ radeon_uvd_enc_nalu_sps_hevc(enc);
+ }
+ radeon_uvd_enc_slice_header_hevc(enc);
+ radeon_uvd_enc_encode_params_hevc(enc);
+
+ radeon_uvd_enc_ctx(enc);
+ radeon_uvd_enc_bitstream(enc);
+ radeon_uvd_enc_feedback(enc);
+ radeon_uvd_enc_intra_refresh(enc);
+
+ radeon_uvd_enc_op_speed(enc);
+ radeon_uvd_enc_op_enc(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+destroy(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_close(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+void
+radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
+{
+ enc->begin = begin;
+ enc->encode = encode;
+ enc->destroy = destroy;
+}
--
2.7.4
Mark Thompson
2018-02-10 16:20:50 UTC
Permalink
Post by James Zhu
Implement required IBs for UVD HEVC encode.
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
1 file changed, 1115 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..2b8156e
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1115 @@
...
+
+static void
+radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->emulation_prevention) {
+ if ((enc->num_zeros >= 2)
+ && ((byte == 0x00) || (byte == 0x01) || (byte == 0x03))) {
Shouldn't { 0, 0, 2 } also trigger emulation prevention? Or am I not understanding what this function does?
Post by James Zhu
+ radeon_uvd_enc_output_one_byte(enc, 0x03);
+ enc->bits_output += 8;
+ enc->num_zeros = 0;
+ }
+ enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
+ }
+}
+
...
Thanks,

- Mark
James Zhu
2018-02-13 15:58:24 UTC
Permalink
Post by Mark Thompson
Post by James Zhu
Implement required IBs for UVD HEVC encode.
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
1 file changed, 1115 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..2b8156e
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1115 @@
...
+
+static void
+radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->emulation_prevention) {
+ if ((enc->num_zeros >= 2)
+ && ((byte == 0x00) || (byte == 0x01) || (byte == 0x03))) {
Shouldn't { 0, 0, 2 } also trigger emulation prevention? Or am I not understanding what this function does?
Hi, Mark,
You are right. should add 00 00 02 case.
pls check the update in PATCH v5 4/8
Thanks! James
Post by Mark Thompson
Post by James Zhu
+ radeon_uvd_enc_output_one_byte(enc, 0x03);
+ enc->bits_output += 8;
+ enc->num_zeros = 0;
+ }
+ enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
+ }
+}
+
...
Thanks,
- Mark
James Zhu
2018-02-12 15:14:29 UTC
Permalink
Implement required IBs for UVD HEVC encode.

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1131 +++++++++++++++++++++++
1 file changed, 1131 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..e507bae
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1131 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RADEON_ENC_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+RADEON_ENC_CS(cmd)
+#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
+ enc->total_task_size += *begin;}
+
+static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
+
+static void
+radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc,
+ struct pb_buffer *buf, enum radeon_bo_usage usage,
+ enum radeon_bo_domain domain, signed offset)
+{
+ enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain, RADEON_PRIO_VCE);
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RADEON_ENC_CS(addr >> 32);
+ RADEON_ENC_CS(addr);
+}
+
+static void
+radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *enc,
+ bool set)
+{
+ if (set != enc->emulation_prevention) {
+ enc->emulation_prevention = set;
+ enc->num_zeros = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->byte_index == 0)
+ enc->cs->current.buf[enc->cs->current.cdw] = 0;
+ enc->cs->current.buf[enc->cs->current.cdw] |=
+ ((unsigned int) (byte) << index_to_shifts[enc->byte_index]);
+ enc->byte_index++;
+
+ if (enc->byte_index >= 4) {
+ enc->byte_index = 0;
+ enc->cs->current.cdw++;
+ }
+}
+
+static void
+radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->emulation_prevention) {
+ if ((enc->num_zeros >= 2)
+ && ((byte == 0x00) || (byte == 0x01) || (byte == 0x03))) {
+ radeon_uvd_enc_output_one_byte(enc, 0x03);
+ enc->bits_output += 8;
+ enc->num_zeros = 0;
+ }
+ enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
+ }
+}
+
+static void
+radeon_uvd_enc_code_fixed_bits(struct radeon_uvd_encoder *enc,
+ unsigned int value, unsigned int num_bits)
+{
+ unsigned int bits_to_pack = 0;
+
+ while (num_bits > 0) {
+ unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits));
+ bits_to_pack =
+ num_bits >
+ (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits;
+
+ if (bits_to_pack < num_bits)
+ value_to_pack = value_to_pack >> (num_bits - bits_to_pack);
+
+ enc->shifter |=
+ value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack);
+ num_bits -= bits_to_pack;
+ enc->bits_in_shifter += bits_to_pack;
+
+ while (enc->bits_in_shifter >= 8) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ enc->shifter <<= 8;
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_in_shifter -= 8;
+ enc->bits_output += 8;
+ }
+ }
+}
+
+static void
+radeon_uvd_enc_reset(struct radeon_uvd_encoder *enc)
+{
+ enc->emulation_prevention = false;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->bits_output = 0;
+ enc->num_zeros = 0;
+ enc->byte_index = 0;
+}
+
+static void
+radeon_uvd_enc_byte_align(struct radeon_uvd_encoder *enc)
+{
+ unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8;
+
+ if (num_padding_zeros > 0)
+ radeon_uvd_enc_code_fixed_bits(enc, 0, num_padding_zeros);
+}
+
+static void
+radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc)
+{
+ if (enc->bits_in_shifter != 0) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_output += enc->bits_in_shifter;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->num_zeros = 0;
+ }
+
+ if (enc->byte_index > 0) {
+ enc->cs->current.cdw++;
+ enc->byte_index = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_code_ue(struct radeon_uvd_encoder *enc, unsigned int value)
+{
+ int x = -1;
+ unsigned int ue_code = value + 1;
+ value += 1;
+
+ while (value) {
+ value = (value >> 1);
+ x += 1;
+ }
+
+ unsigned int ue_length = (x << 1) + 1;
+ radeon_uvd_enc_code_fixed_bits(enc, ue_code, ue_length);
+}
+
+static void
+radeon_uvd_enc_code_se(struct radeon_uvd_encoder *enc, int value)
+{
+ unsigned int v = 0;
+
+ if (value != 0)
+ v = (value < 0 ? ((unsigned int) (0 - value) << 1)
+ : (((unsigned int) (value) << 1) - 1));
+
+ radeon_uvd_enc_code_ue(enc, v);
+}
+
+static void
+radeon_uvd_enc_session_info(struct radeon_uvd_encoder *enc)
+{
+ unsigned int interface_version =
+ ((RENC_UVD_FW_INTERFACE_MAJOR_VERSION <<
+ RENC_UVD_IF_MAJOR_VERSION_SHIFT) |
+ (RENC_UVD_FW_INTERFACE_MINOR_VERSION <<
+ RENC_UVD_IF_MINOR_VERSION_SHIFT));
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INFO);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(interface_version);
+ RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_feedback)
+{
+ enc->enc_pic.task_info.task_id++;
+
+ if (need_feedback)
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 1;
+ else
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_TASK_INFO);
+ enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++];
+ RADEON_ENC_CS(enc->enc_pic.task_info.task_id);
+ RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.session_init.aligned_picture_width =
+ align(enc->base.width, 64);
+ enc->enc_pic.session_init.aligned_picture_height =
+ align(enc->base.height, 16);
+ enc->enc_pic.session_init.padding_width =
+ enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+ enc->enc_pic.session_init.padding_height =
+ enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+ enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
+ enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_control(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
+ enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_select(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_sel.temporal_layer_index = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_SELECT);
+ RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_control_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.hevc_slice_ctrl.slice_control_mode =
+ RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS;
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice =
+ align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64;
+ enc->enc_pic.hevc_slice_ctrl.
+ fixed_ctbs_per_slice.num_ctbs_per_slice_segment =
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.
+ fixed_ctbs_per_slice.num_ctbs_per_slice);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.
+ fixed_ctbs_per_slice.num_ctbs_per_slice_segment);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_spec_misc_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 =
+ pic->seq.log2_min_luma_coding_block_size_minus3;
+ enc->enc_pic.hevc_spec_misc.amp_disabled = !pic->seq.amp_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled =
+ pic->seq.strong_intra_smoothing_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag =
+ pic->pic.constrained_intra_pred_flag;
+ enc->enc_pic.hevc_spec_misc.cabac_init_flag = pic->slice.cabac_init_flag;
+ enc->enc_pic.hevc_spec_misc.half_pel_enabled = 1;
+ enc->enc_pic.hevc_spec_misc.quarter_pel_enabled = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SPEC_MISC);
+ RADEON_ENC_CS(enc->enc_pic.
+ hevc_spec_misc.log2_min_luma_coding_block_size_minus3);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.cabac_init_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.half_pel_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.quarter_pel_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc.vbv_buf_lv;
+ switch (pic->rc.rate_ctrl_method) {
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ break;
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP:
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_CBR;
+ break;
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP:
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+ break;
+ default:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ }
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.rate_control_method);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.vbv_buffer_level);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_layer_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_layer_init.target_bit_rate = pic->rc.target_bitrate;
+ enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rc.peak_bitrate;
+ enc->enc_pic.rc_layer_init.frame_rate_num = pic->rc.frame_rate_num;
+ enc->enc_pic.rc_layer_init.frame_rate_den = pic->rc.frame_rate_den;
+ enc->enc_pic.rc_layer_init.vbv_buffer_size = pic->rc.vbv_buffer_size;
+ enc->enc_pic.rc_layer_init.avg_target_bits_per_picture =
+ pic->rc.target_bits_picture;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer =
+ pic->rc.peak_bits_picture_integer;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional =
+ pic->rc.peak_bits_picture_fraction;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.target_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_num);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_den);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.vbv_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.avg_target_bits_per_picture);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_deblocking_filter_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled =
+ pic->slice.slice_loop_filter_across_slices_enabled_flag;
+ enc->enc_pic.hevc_deblock.deblocking_filter_disabled =
+ pic->slice.slice_deblocking_filter_disabled_flag;
+ enc->enc_pic.hevc_deblock.beta_offset_div2 =
+ pic->slice.slice_beta_offset_div2;
+ enc->enc_pic.hevc_deblock.tc_offset_div2 = pic->slice.slice_tc_offset_div2;
+ enc->enc_pic.hevc_deblock.cb_qp_offset = pic->slice.slice_cb_qp_offset;
+ enc->enc_pic.hevc_deblock.cr_qp_offset = pic->slice.slice_cr_qp_offset;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_DEBLOCKING_FILTER);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.quality_params.vbaq_mode = 0;
+ enc->enc_pic.quality_params.scene_change_sensitivity = 0;
+ enc->enc_pic.quality_params.scene_change_min_idr_interval = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_QUALITY_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ layer_ctrl.max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_width);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_height);
+
+ int conformance_window_flag =
+ (enc->enc_pic.crop_top > 0) ||
+ (enc->enc_pic.crop_bottom > 0) ||
+ (enc->enc_pic.crop_left > 0) ||
+ (enc->enc_pic.crop_right > 0) ? 0x1 : 0x0;
+ radeon_uvd_enc_code_fixed_bits(enc, conformance_window_flag, 1);
+ if (conformance_window_flag == 1) {
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_left);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_right);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_top);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_bottom);
+ }
+
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ /* Only support CTBSize 64 */
+ radeon_uvd_enc_code_ue(enc,
+ 6 -
+ (enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3 + 3));
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.log2_min_transform_block_size_minus2);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.
+ log2_diff_max_min_transform_block_size);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_inter);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ !enc->enc_pic.hevc_spec_misc.amp_disabled,
+ 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ sample_adaptive_offset_enabled_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
+
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ strong_intra_smoothing_enabled, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); /* output_flag_resent_flag */
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 3); /* num_extra_slice_header_bits */
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_se(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ constrained_intra_pred_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ deblocking_filter_disabled, 1);
+
+ if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ break;
+ default:
+ assert(0 && "Unsupported picture type!");
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
+{
+ uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ unsigned int inst_index = 0;
+ unsigned int bit_index = 0;
+ unsigned int bits_copied = 0;
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
+ inst_index++;
+
+ if ((enc->enc_pic.nal_unit_type >= 16)
+ && (enc->enc_pic.nal_unit_type <= 23))
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
+ inst_index++;
+
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ radeon_uvd_enc_code_ue(enc, 0x2);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ break;
+ default:
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ }
+
+ if ((enc->enc_pic.nal_unit_type != 19)
+ && (enc->enc_pic.nal_unit_type != 20)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.frame_num %
+ enc->enc_pic.max_poc,
+ enc->enc_pic.log2_max_poc);
+ if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ else {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ }
+ }
+
+ if (enc->enc_pic.sample_adaptive_offset_enabled_flag)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); /* slice_sao_luma_flag */
+
+ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
+ (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ cabac_init_flag, 1);
+ radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
+ }
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+ inst_index++;
+
+ if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
+ (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+ }
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
+
+ for (int i = bit_index;
+ i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
+ j++) {
+ RADEON_ENC_CS(instruction[j]);
+ RADEON_ENC_CS(num_bits[j]);
+ }
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+
+ enc->enc_pic.ctx_buf.swizzle_mode = 0;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER);
+ RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures);
+ /* reconstructed_picture_1_luma_offset */
+ RADEON_ENC_CS(0x00000000);
+ /* reconstructed_picture_1_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16));
+ /* reconstructed_picture_2_luma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch *
+ align(enc->base.height, 16) * 3 / 2);
+ /* reconstructed_picture_2_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16) * 5 / 2);
+
+ for (int i = 0; i < 136; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.bit_buf.mode = RENC_UVD_SWIZZLE_MODE_LINEAR;
+ enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size;
+ enc->enc_pic.bit_buf.video_bitstream_data_offset = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.mode);
+ RADEON_ENC_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_data_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_feedback(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.fb_buf.mode = RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR;
+ enc->enc_pic.fb_buf.feedback_buffer_size = 16;
+ enc->enc_pic.fb_buf.feedback_data_size = 40;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_FEEDBACK_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.mode);
+ RADEON_ENC_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_data_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_intra_refresh(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.intra_ref.intra_refresh_mode =
+ RENC_UVD_INTRA_REFRESH_MODE_NONE;
+ enc->enc_pic.intra_ref.offset = 0;
+ enc->enc_pic.intra_ref.region_size = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INTRA_REFRESH);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.intra_refresh_mode);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.offset);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.region_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_per_pic(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_per_pic.qp = pic->rc.quant_i_frames;
+ enc->enc_pic.rc_per_pic.min_qp_app = 0;
+ enc->enc_pic.rc_per_pic.max_qp_app = 51;
+ enc->enc_pic.rc_per_pic.max_au_size = 0;
+ enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc.fill_data_enable;
+ enc->enc_pic.rc_per_pic.skip_frame_enable = false;
+ enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc.enforce_hrd;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enabled_filler_data);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.skip_frame_enable);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enforce_hrd);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P_SKIP;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_B;
+ break;
+ default:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ }
+
+ enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.enc_params.input_pic_swizzle_mode =
+ RENC_UVD_SWIZZLE_MODE_LINEAR;
+
+ if (enc->enc_pic.enc_params.pic_type == RENC_UVD_PICTURE_TYPE_I)
+ enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF;
+ else
+ enc->enc_pic.enc_params.reference_picture_index =
+ (enc->enc_pic.frame_num - 1) % 2;
+
+ enc->enc_pic.enc_params.reconstructed_picture_index =
+ enc->enc_pic.frame_num % 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size);
+
+ if (rscreen->info.chip_class < GFX9) {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.legacy.level[0].offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.legacy.level[0].offset);
+ }
+ else {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.gfx9.surf_offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.gfx9.surf_offset);
+ }
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INITIALIZE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_close(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_CLOSE_SESSION);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_enc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_ENCODE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc_vbv(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_speed(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE);
+ RADEON_ENC_END();
+}
+
+static void
+begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_init(enc);
+
+ radeon_uvd_enc_session_init_hevc(enc);
+ radeon_uvd_enc_slice_control_hevc(enc);
+ radeon_uvd_enc_spec_misc_hevc(enc, pic);
+ radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
+
+ radeon_uvd_enc_layer_control(enc);
+ radeon_uvd_enc_rc_session_init(enc, pic);
+ radeon_uvd_enc_quality_params(enc);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_layer_init(enc, pic);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_per_pic(enc, pic);
+ radeon_uvd_enc_op_init_rc(enc);
+ radeon_uvd_enc_op_init_rc_vbv(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+encode(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+
+ radeon_uvd_enc_nalu_aud_hevc(enc);
+
+ if (enc->enc_pic.is_iframe) {
+ radeon_uvd_enc_nalu_vps_hevc(enc);
+ radeon_uvd_enc_nalu_pps_hevc(enc);
+ radeon_uvd_enc_nalu_sps_hevc(enc);
+ }
+ radeon_uvd_enc_slice_header_hevc(enc);
+ radeon_uvd_enc_encode_params_hevc(enc);
+
+ radeon_uvd_enc_ctx(enc);
+ radeon_uvd_enc_bitstream(enc);
+ radeon_uvd_enc_feedback(enc);
+ radeon_uvd_enc_intra_refresh(enc);
+
+ radeon_uvd_enc_op_speed(enc);
+ radeon_uvd_enc_op_enc(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+destroy(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_close(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+void
+radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
+{
+ enc->begin = begin;
+ enc->encode = encode;
+ enc->destroy = destroy;
+}
--
2.7.4
James Zhu
2018-02-13 15:56:29 UTC
Permalink
Implement required IBs for UVD HEVC encode.

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1132 +++++++++++++++++++++++
1 file changed, 1132 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
new file mode 100644
index 0000000..42a9fa9
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -0,0 +1,1132 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RADEON_ENC_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+RADEON_ENC_CS(cmd)
+#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
+ enc->total_task_size += *begin;}
+
+static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
+
+static void
+radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc,
+ struct pb_buffer *buf, enum radeon_bo_usage usage,
+ enum radeon_bo_domain domain, signed offset)
+{
+ enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain, RADEON_PRIO_VCE);
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RADEON_ENC_CS(addr >> 32);
+ RADEON_ENC_CS(addr);
+}
+
+static void
+radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *enc,
+ bool set)
+{
+ if (set != enc->emulation_prevention) {
+ enc->emulation_prevention = set;
+ enc->num_zeros = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->byte_index == 0)
+ enc->cs->current.buf[enc->cs->current.cdw] = 0;
+ enc->cs->current.buf[enc->cs->current.cdw] |=
+ ((unsigned int) (byte) << index_to_shifts[enc->byte_index]);
+ enc->byte_index++;
+
+ if (enc->byte_index >= 4) {
+ enc->byte_index = 0;
+ enc->cs->current.cdw++;
+ }
+}
+
+static void
+radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc,
+ unsigned char byte)
+{
+ if (enc->emulation_prevention) {
+ if ((enc->num_zeros >= 2)
+ && ((byte == 0x00) || (byte == 0x01)
+ || (byte == 0x02) || (byte == 0x03))) {
+ radeon_uvd_enc_output_one_byte(enc, 0x03);
+ enc->bits_output += 8;
+ enc->num_zeros = 0;
+ }
+ enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
+ }
+}
+
+static void
+radeon_uvd_enc_code_fixed_bits(struct radeon_uvd_encoder *enc,
+ unsigned int value, unsigned int num_bits)
+{
+ unsigned int bits_to_pack = 0;
+
+ while (num_bits > 0) {
+ unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits));
+ bits_to_pack =
+ num_bits >
+ (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits;
+
+ if (bits_to_pack < num_bits)
+ value_to_pack = value_to_pack >> (num_bits - bits_to_pack);
+
+ enc->shifter |=
+ value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack);
+ num_bits -= bits_to_pack;
+ enc->bits_in_shifter += bits_to_pack;
+
+ while (enc->bits_in_shifter >= 8) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ enc->shifter <<= 8;
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_in_shifter -= 8;
+ enc->bits_output += 8;
+ }
+ }
+}
+
+static void
+radeon_uvd_enc_reset(struct radeon_uvd_encoder *enc)
+{
+ enc->emulation_prevention = false;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->bits_output = 0;
+ enc->num_zeros = 0;
+ enc->byte_index = 0;
+}
+
+static void
+radeon_uvd_enc_byte_align(struct radeon_uvd_encoder *enc)
+{
+ unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8;
+
+ if (num_padding_zeros > 0)
+ radeon_uvd_enc_code_fixed_bits(enc, 0, num_padding_zeros);
+}
+
+static void
+radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc)
+{
+ if (enc->bits_in_shifter != 0) {
+ unsigned char output_byte = (unsigned char) (enc->shifter >> 24);
+ radeon_uvd_enc_emulation_prevention(enc, output_byte);
+ radeon_uvd_enc_output_one_byte(enc, output_byte);
+ enc->bits_output += enc->bits_in_shifter;
+ enc->shifter = 0;
+ enc->bits_in_shifter = 0;
+ enc->num_zeros = 0;
+ }
+
+ if (enc->byte_index > 0) {
+ enc->cs->current.cdw++;
+ enc->byte_index = 0;
+ }
+}
+
+static void
+radeon_uvd_enc_code_ue(struct radeon_uvd_encoder *enc, unsigned int value)
+{
+ int x = -1;
+ unsigned int ue_code = value + 1;
+ value += 1;
+
+ while (value) {
+ value = (value >> 1);
+ x += 1;
+ }
+
+ unsigned int ue_length = (x << 1) + 1;
+ radeon_uvd_enc_code_fixed_bits(enc, ue_code, ue_length);
+}
+
+static void
+radeon_uvd_enc_code_se(struct radeon_uvd_encoder *enc, int value)
+{
+ unsigned int v = 0;
+
+ if (value != 0)
+ v = (value < 0 ? ((unsigned int) (0 - value) << 1)
+ : (((unsigned int) (value) << 1) - 1));
+
+ radeon_uvd_enc_code_ue(enc, v);
+}
+
+static void
+radeon_uvd_enc_session_info(struct radeon_uvd_encoder *enc)
+{
+ unsigned int interface_version =
+ ((RENC_UVD_FW_INTERFACE_MAJOR_VERSION <<
+ RENC_UVD_IF_MAJOR_VERSION_SHIFT) |
+ (RENC_UVD_FW_INTERFACE_MINOR_VERSION <<
+ RENC_UVD_IF_MINOR_VERSION_SHIFT));
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INFO);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(interface_version);
+ RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_feedback)
+{
+ enc->enc_pic.task_info.task_id++;
+
+ if (need_feedback)
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 1;
+ else
+ enc->enc_pic.task_info.allowed_max_num_feedbacks = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_TASK_INFO);
+ enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++];
+ RADEON_ENC_CS(enc->enc_pic.task_info.task_id);
+ RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.session_init.aligned_picture_width =
+ align(enc->base.width, 64);
+ enc->enc_pic.session_init.aligned_picture_height =
+ align(enc->base.height, 16);
+ enc->enc_pic.session_init.padding_width =
+ enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+ enc->enc_pic.session_init.padding_height =
+ enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+ enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
+ enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+ RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+ RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_control(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
+ enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
+ RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_layer_select(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.layer_sel.temporal_layer_index = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_LAYER_SELECT);
+ RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_control_hevc(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.hevc_slice_ctrl.slice_control_mode =
+ RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS;
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice =
+ align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64;
+ enc->enc_pic.hevc_slice_ctrl.
+ fixed_ctbs_per_slice.num_ctbs_per_slice_segment =
+ enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_CONTROL);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.
+ fixed_ctbs_per_slice.num_ctbs_per_slice);
+ RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.
+ fixed_ctbs_per_slice.num_ctbs_per_slice_segment);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_spec_misc_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 =
+ pic->seq.log2_min_luma_coding_block_size_minus3;
+ enc->enc_pic.hevc_spec_misc.amp_disabled = !pic->seq.amp_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled =
+ pic->seq.strong_intra_smoothing_enabled_flag;
+ enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag =
+ pic->pic.constrained_intra_pred_flag;
+ enc->enc_pic.hevc_spec_misc.cabac_init_flag = pic->slice.cabac_init_flag;
+ enc->enc_pic.hevc_spec_misc.half_pel_enabled = 1;
+ enc->enc_pic.hevc_spec_misc.quarter_pel_enabled = 1;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SPEC_MISC);
+ RADEON_ENC_CS(enc->enc_pic.
+ hevc_spec_misc.log2_min_luma_coding_block_size_minus3);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.cabac_init_flag);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.half_pel_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.quarter_pel_enabled);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc.vbv_buf_lv;
+ switch (pic->rc.rate_ctrl_method) {
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ break;
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP:
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_CBR;
+ break;
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP:
+ case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+ break;
+ default:
+ enc->enc_pic.rc_session_init.rate_control_method =
+ RENC_UVD_RATE_CONTROL_METHOD_NONE;
+ }
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.rate_control_method);
+ RADEON_ENC_CS(enc->enc_pic.rc_session_init.vbv_buffer_level);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_layer_init(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_layer_init.target_bit_rate = pic->rc.target_bitrate;
+ enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rc.peak_bitrate;
+ enc->enc_pic.rc_layer_init.frame_rate_num = pic->rc.frame_rate_num;
+ enc->enc_pic.rc_layer_init.frame_rate_den = pic->rc.frame_rate_den;
+ enc->enc_pic.rc_layer_init.vbv_buffer_size = pic->rc.vbv_buffer_size;
+ enc->enc_pic.rc_layer_init.avg_target_bits_per_picture =
+ pic->rc.target_bits_picture;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer =
+ pic->rc.peak_bits_picture_integer;
+ enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional =
+ pic->rc.peak_bits_picture_fraction;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.target_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bit_rate);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_num);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_den);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.vbv_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.avg_target_bits_per_picture);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer);
+ RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_deblocking_filter_hevc(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled =
+ pic->slice.slice_loop_filter_across_slices_enabled_flag;
+ enc->enc_pic.hevc_deblock.deblocking_filter_disabled =
+ pic->slice.slice_deblocking_filter_disabled_flag;
+ enc->enc_pic.hevc_deblock.beta_offset_div2 =
+ pic->slice.slice_beta_offset_div2;
+ enc->enc_pic.hevc_deblock.tc_offset_div2 = pic->slice.slice_tc_offset_div2;
+ enc->enc_pic.hevc_deblock.cb_qp_offset = pic->slice.slice_cb_qp_offset;
+ enc->enc_pic.hevc_deblock.cr_qp_offset = pic->slice.slice_cr_qp_offset;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_DEBLOCKING_FILTER);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset);
+ RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.quality_params.vbaq_mode = 0;
+ enc->enc_pic.quality_params.scene_change_sensitivity = 0;
+ enc->enc_pic.quality_params.scene_change_min_idr_interval = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_QUALITY_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity);
+ RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ layer_ctrl.max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_width);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.session_init.aligned_picture_height);
+
+ int conformance_window_flag =
+ (enc->enc_pic.crop_top > 0) ||
+ (enc->enc_pic.crop_bottom > 0) ||
+ (enc->enc_pic.crop_left > 0) ||
+ (enc->enc_pic.crop_right > 0) ? 0x1 : 0x0;
+ radeon_uvd_enc_code_fixed_bits(enc, conformance_window_flag, 1);
+ if (conformance_window_flag == 1) {
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_left);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_right);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_top);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_bottom);
+ }
+
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3);
+ /* Only support CTBSize 64 */
+ radeon_uvd_enc_code_ue(enc,
+ 6 -
+ (enc->enc_pic.hevc_spec_misc.
+ log2_min_luma_coding_block_size_minus3 + 3));
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.log2_min_transform_block_size_minus2);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.
+ log2_diff_max_min_transform_block_size);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_inter);
+ radeon_uvd_enc_code_ue(enc,
+ enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ !enc->enc_pic.hevc_spec_misc.amp_disabled,
+ 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.
+ sample_adaptive_offset_enabled_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
+
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 1);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_ue(enc, 0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ strong_intra_smoothing_enabled, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); /* output_flag_resent_flag */
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 3); /* num_extra_slice_header_bits */
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_se(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ constrained_intra_pred_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ deblocking_filter_disabled, 1);
+
+ if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
+ radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ int i;
+
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.layer_ctrl.
+ max_num_temporal_layers - 1, 3);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+ for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+
+ if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+ for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
+ RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
+ uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, true);
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
+ break;
+ default:
+ assert(0 && "Unsupported picture type!");
+ }
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+
+ radeon_uvd_enc_byte_align(enc);
+ radeon_uvd_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
+{
+ uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
+ unsigned int inst_index = 0;
+ unsigned int bit_index = 0;
+ unsigned int bits_copied = 0;
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
+ radeon_uvd_enc_reset(enc);
+ radeon_uvd_enc_set_emulation_prevention(enc, false);
+
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
+ inst_index++;
+
+ if ((enc->enc_pic.nal_unit_type >= 16)
+ && (enc->enc_pic.nal_unit_type <= 23))
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+
+ radeon_uvd_enc_code_ue(enc, 0x0);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
+ inst_index++;
+
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ radeon_uvd_enc_code_ue(enc, 0x2);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ break;
+ default:
+ radeon_uvd_enc_code_ue(enc, 0x1);
+ }
+
+ if ((enc->enc_pic.nal_unit_type != 19)
+ && (enc->enc_pic.nal_unit_type != 20)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.frame_num %
+ enc->enc_pic.max_poc,
+ enc->enc_pic.log2_max_poc);
+ if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+ else {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ radeon_uvd_enc_code_ue(enc, 0x0);
+ }
+ }
+
+ if (enc->enc_pic.sample_adaptive_offset_enabled_flag)
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); /* slice_sao_luma_flag */
+
+ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
+ (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
+ radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_spec_misc.
+ cabac_init_flag, 1);
+ radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
+ }
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+ inst_index++;
+
+ if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
+ (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
+ radeon_uvd_enc_code_fixed_bits(enc,
+ enc->enc_pic.hevc_deblock.
+ loop_filter_across_slices_enabled, 1);
+
+ radeon_uvd_enc_flush_headers(enc);
+ bit_index++;
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
+ num_bits[inst_index] = enc->bits_output - bits_copied;
+ bits_copied = enc->bits_output;
+ inst_index++;
+ }
+
+ instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
+
+ for (int i = bit_index;
+ i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
+ j++) {
+ RADEON_ENC_CS(instruction[j]);
+ RADEON_ENC_CS(num_bits[j]);
+ }
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+
+ enc->enc_pic.ctx_buf.swizzle_mode = 0;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.ctx_buf.rec_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.ctx_buf.rec_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER);
+ RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures);
+ /* reconstructed_picture_1_luma_offset */
+ RADEON_ENC_CS(0x00000000);
+ /* reconstructed_picture_1_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16));
+ /* reconstructed_picture_2_luma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch *
+ align(enc->base.height, 16) * 3 / 2);
+ /* reconstructed_picture_2_chroma_offset */
+ RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch *
+ align(enc->base.height, 16) * 5 / 2);
+
+ for (int i = 0; i < 136; i++)
+ RADEON_ENC_CS(0x00000000);
+
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.bit_buf.mode = RENC_UVD_SWIZZLE_MODE_LINEAR;
+ enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size;
+ enc->enc_pic.bit_buf.video_bitstream_data_offset = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.mode);
+ RADEON_ENC_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_data_offset);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_feedback(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.fb_buf.mode = RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR;
+ enc->enc_pic.fb_buf.feedback_buffer_size = 16;
+ enc->enc_pic.fb_buf.feedback_data_size = 40;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_FEEDBACK_BUFFER);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.mode);
+ RADEON_ENC_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_buffer_size);
+ RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_data_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_intra_refresh(struct radeon_uvd_encoder *enc)
+{
+ enc->enc_pic.intra_ref.intra_refresh_mode =
+ RENC_UVD_INTRA_REFRESH_MODE_NONE;
+ enc->enc_pic.intra_ref.offset = 0;
+ enc->enc_pic.intra_ref.region_size = 0;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INTRA_REFRESH);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.intra_refresh_mode);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.offset);
+ RADEON_ENC_CS(enc->enc_pic.intra_ref.region_size);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_rc_per_pic(struct radeon_uvd_encoder *enc,
+ struct pipe_picture_desc *picture)
+{
+ struct pipe_h265_enc_picture_desc *pic =
+ (struct pipe_h265_enc_picture_desc *) picture;
+ enc->enc_pic.rc_per_pic.qp = pic->rc.quant_i_frames;
+ enc->enc_pic.rc_per_pic.min_qp_app = 0;
+ enc->enc_pic.rc_per_pic.max_qp_app = 51;
+ enc->enc_pic.rc_per_pic.max_au_size = 0;
+ enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc.fill_data_enable;
+ enc->enc_pic.rc_per_pic.skip_frame_enable = false;
+ enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc.enforce_hrd;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_app);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enabled_filler_data);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.skip_frame_enable);
+ RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enforce_hrd);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc)
+{
+ struct si_screen *rscreen = (struct si_screen *) enc->screen;
+ switch (enc->enc_pic.picture_type) {
+ case PIPE_H265_ENC_PICTURE_TYPE_I:
+ case PIPE_H265_ENC_PICTURE_TYPE_IDR:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_P:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_P_SKIP;
+ break;
+ case PIPE_H265_ENC_PICTURE_TYPE_B:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_B;
+ break;
+ default:
+ enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I;
+ }
+
+ enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size;
+ if (rscreen->info.chip_class < GFX9) {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe);
+ }
+ else {
+ enc->enc_pic.enc_params.input_pic_luma_pitch =
+ enc->luma->u.gfx9.surf_pitch * enc->luma->bpe;
+ enc->enc_pic.enc_params.input_pic_chroma_pitch =
+ enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe;
+ }
+ enc->enc_pic.enc_params.input_pic_swizzle_mode =
+ RENC_UVD_SWIZZLE_MODE_LINEAR;
+
+ if (enc->enc_pic.enc_params.pic_type == RENC_UVD_PICTURE_TYPE_I)
+ enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF;
+ else
+ enc->enc_pic.enc_params.reference_picture_index =
+ (enc->enc_pic.frame_num - 1) % 2;
+
+ enc->enc_pic.enc_params.reconstructed_picture_index =
+ enc->enc_pic.frame_num % 2;
+
+ RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_PARAMS);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size);
+
+ if (rscreen->info.chip_class < GFX9) {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.legacy.level[0].offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.legacy.level[0].offset);
+ }
+ else {
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->u.gfx9.surf_offset);
+ RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->u.gfx9.surf_offset);
+ }
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch);
+ RADEON_ENC_CS(0x00000000); // reserved
+ RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index);
+ RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INITIALIZE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_close(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_CLOSE_SESSION);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_enc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_ENCODE);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_init_rc_vbv(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL);
+ RADEON_ENC_END();
+}
+
+static void
+radeon_uvd_enc_op_speed(struct radeon_uvd_encoder *enc)
+{
+ RADEON_ENC_BEGIN(RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE);
+ RADEON_ENC_END();
+}
+
+static void
+begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_init(enc);
+
+ radeon_uvd_enc_session_init_hevc(enc);
+ radeon_uvd_enc_slice_control_hevc(enc);
+ radeon_uvd_enc_spec_misc_hevc(enc, pic);
+ radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
+
+ radeon_uvd_enc_layer_control(enc);
+ radeon_uvd_enc_rc_session_init(enc, pic);
+ radeon_uvd_enc_quality_params(enc);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_layer_init(enc, pic);
+ radeon_uvd_enc_layer_select(enc);
+ radeon_uvd_enc_rc_per_pic(enc, pic);
+ radeon_uvd_enc_op_init_rc(enc);
+ radeon_uvd_enc_op_init_rc_vbv(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+encode(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+
+ radeon_uvd_enc_nalu_aud_hevc(enc);
+
+ if (enc->enc_pic.is_iframe) {
+ radeon_uvd_enc_nalu_vps_hevc(enc);
+ radeon_uvd_enc_nalu_pps_hevc(enc);
+ radeon_uvd_enc_nalu_sps_hevc(enc);
+ }
+ radeon_uvd_enc_slice_header_hevc(enc);
+ radeon_uvd_enc_encode_params_hevc(enc);
+
+ radeon_uvd_enc_ctx(enc);
+ radeon_uvd_enc_bitstream(enc);
+ radeon_uvd_enc_feedback(enc);
+ radeon_uvd_enc_intra_refresh(enc);
+
+ radeon_uvd_enc_op_speed(enc);
+ radeon_uvd_enc_op_enc(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+static void
+destroy(struct radeon_uvd_encoder *enc)
+{
+ radeon_uvd_enc_session_info(enc);
+ enc->total_task_size = 0;
+ radeon_uvd_enc_task_info(enc, enc->need_feedback);
+ radeon_uvd_enc_op_close(enc);
+ *enc->p_task_size = (enc->total_task_size);
+}
+
+void
+radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
+{
+ enc->begin = begin;
+ enc->encode = encode;
+ enc->destroy = destroy;
+}
--
2.7.4
James Zhu
2018-02-06 20:05:42 UTC
Permalink
Implement UVD hevc encode functions

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++++++++++++++++++++++
1 file changed, 370 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 0000000..f162589
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,370 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+static void
+radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc,
+ struct pipe_h265_enc_picture_desc *pic)
+{
+ enc->enc_pic.picture_type = pic->picture_type;
+ enc->enc_pic.frame_num = pic->frame_num;
+ enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+ enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+ enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+ enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+ enc->enc_pic.not_referenced = pic->not_referenced;
+ enc->enc_pic.is_idr = (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR)
+ || (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I);
+ enc->enc_pic.crop_left = 0;
+ enc->enc_pic.crop_right =
+ (align(enc->base.width, 16) - enc->base.width) / 2;
+ enc->enc_pic.crop_top = 0;
+ enc->enc_pic.crop_bottom =
+ (align(enc->base.height, 16) - enc->base.height) / 2;
+ enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+ enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+ enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+ enc->enc_pic.max_poc = pic->seq.intra_period;
+ enc->enc_pic.log2_max_poc = 0;
+ for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+ enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+ enc->enc_pic.pic_width_in_luma_samples =
+ pic->seq.pic_width_in_luma_samples;
+ enc->enc_pic.pic_height_in_luma_samples =
+ pic->seq.pic_height_in_luma_samples;
+ enc->enc_pic.log2_diff_max_min_luma_coding_block_size =
+ pic->seq.log2_diff_max_min_luma_coding_block_size;
+ enc->enc_pic.log2_min_transform_block_size_minus2 =
+ pic->seq.log2_min_transform_block_size_minus2;
+ enc->enc_pic.log2_diff_max_min_transform_block_size =
+ pic->seq.log2_diff_max_min_transform_block_size;
+ enc->enc_pic.max_transform_hierarchy_depth_inter =
+ pic->seq.max_transform_hierarchy_depth_inter;
+ enc->enc_pic.max_transform_hierarchy_depth_intra =
+ pic->seq.max_transform_hierarchy_depth_intra;
+ enc->enc_pic.log2_parallel_merge_level_minus2 =
+ pic->pic.log2_parallel_merge_level_minus2;
+ enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8;
+ enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8;
+ enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type;
+ enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand;
+ enc->enc_pic.sample_adaptive_offset_enabled_flag =
+ pic->seq.sample_adaptive_offset_enabled_flag;
+ enc->enc_pic.pcm_enabled_flag = pic->seq.pcm_enabled_flag;
+ enc->enc_pic.sps_temporal_mvp_enabled_flag =
+ pic->seq.sps_temporal_mvp_enabled_flag;
+}
+
+static void
+flush(struct radeon_uvd_encoder *enc)
+{
+ enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL);
+}
+
+static void
+radeon_uvd_enc_flush(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_cs_flush(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ // just ignored
+}
+
+static unsigned
+get_cpb_num(struct radeon_uvd_encoder *enc)
+{
+ unsigned w = align(enc->base.width, 16) / 16;
+ unsigned h = align(enc->base.height, 16) / 16;
+ unsigned dpb;
+
+ switch (enc->base.level) {
+ case 10:
+ dpb = 396;
+ break;
+ case 11:
+ dpb = 900;
+ break;
+ case 12:
+ case 13:
+ case 20:
+ dpb = 2376;
+ break;
+ case 21:
+ dpb = 4752;
+ break;
+ case 22:
+ case 30:
+ dpb = 8100;
+ break;
+ case 31:
+ dpb = 18000;
+ break;
+ case 32:
+ dpb = 20480;
+ break;
+ case 40:
+ case 41:
+ dpb = 32768;
+ break;
+ case 42:
+ dpb = 34816;
+ break;
+ case 50:
+ dpb = 110400;
+ break;
+ default:
+ case 51:
+ case 52:
+ dpb = 184320;
+ break;
+ }
+
+ return MIN2(dpb / (w * h), 16);
+}
+
+static void
+radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ struct vl_video_buffer *vid_buf = (struct vl_video_buffer *) source;
+
+ radeon_uvd_enc_get_param(enc,
+ (struct pipe_h265_enc_picture_desc *) picture);
+
+ enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
+ enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
+
+ enc->need_feedback = false;
+
+ if (!enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->stream_handle = si_vid_alloc_stream_handle();
+ enc->si = CALLOC_STRUCT(rvid_buffer);
+ si_vid_create_buffer(enc->screen, enc->si, 128 * 1024,
+ PIPE_USAGE_STAGING);
+ si_vid_create_buffer(enc->screen, &fb, 4096, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->begin(enc, picture);
+ flush(enc);
+ si_vid_destroy_buffer(&fb);
+ }
+}
+
+static void
+radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_resource *destination, void **fb)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ enc->get_buffer(destination, &enc->bs_handle, NULL);
+ enc->bs_size = destination->width0;
+
+ *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
+
+ if (!si_vid_create_buffer(enc->screen, enc->fb, 4096, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't create feedback buffer.\n");
+ return;
+ }
+
+ enc->need_feedback = true;
+ enc->encode(enc);
+}
+
+static void
+radeon_uvd_enc_end_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_destroy(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+
+ if (enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->need_feedback = false;
+ si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->destroy(enc);
+ flush(enc);
+ si_vid_destroy_buffer(&fb);
+ }
+
+ si_vid_destroy_buffer(&enc->cpb);
+ enc->ws->cs_destroy(enc->cs);
+ FREE(enc);
+}
+
+static void
+radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder,
+ void *feedback, unsigned *size)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ struct rvid_buffer *fb = feedback;
+
+ if (NULL != size) {
+ radeon_uvd_enc_feedback_t *fb_data =
+ (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,
+ enc->cs,
+ PIPE_TRANSFER_READ_WRITE);
+
+ if (!fb_data->status)
+ *size = fb_data->bitstream_size;
+ else
+ *size = 0;
+ enc->ws->buffer_unmap(fb->res->buf);
+ }
+
+ si_vid_destroy_buffer(fb);
+ FREE(fb);
+}
+
+struct pipe_video_codec *
+radeon_uvd_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer get_buffer)
+{
+ struct si_screen *sscreen = (struct si_screen *) context->screen;
+ struct r600_common_context *rctx = (struct r600_common_context *) context;
+ struct radeon_uvd_encoder *enc;
+ struct pipe_video_buffer *tmp_buf, templat = { };
+ struct radeon_surf *tmp_surf;
+ unsigned cpb_size;
+
+ if (!si_radeon_uvd_enc_supported(sscreen)) {
+ RVID_ERR("Unsupported UVD ENC fw version loaded!\n");
+ return NULL;
+ }
+
+ enc = CALLOC_STRUCT(radeon_uvd_encoder);
+
+ if (!enc)
+ return NULL;
+
+ enc->base = *templ;
+ enc->base.context = context;
+ enc->base.destroy = radeon_uvd_enc_destroy;
+ enc->base.begin_frame = radeon_uvd_enc_begin_frame;
+ enc->base.encode_bitstream = radeon_uvd_enc_encode_bitstream;
+ enc->base.end_frame = radeon_uvd_enc_end_frame;
+ enc->base.flush = radeon_uvd_enc_flush;
+ enc->base.get_feedback = radeon_uvd_enc_get_feedback;
+ enc->get_buffer = get_buffer;
+ enc->bits_in_shifter = 0;
+ enc->screen = context->screen;
+ enc->ws = ws;
+ enc->cs =
+ ws->cs_create(rctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc);
+
+ if (!enc->cs) {
+ RVID_ERR("Can't get command submission context.\n");
+ goto error;
+ }
+
+ struct rvid_buffer si;
+ si_vid_create_buffer(enc->screen, &si, 128 * 1024, PIPE_USAGE_STAGING);
+ enc->si = &si;
+
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = enc->base.width;
+ templat.height = enc->base.height;
+ templat.interlaced = false;
+
+ if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
+ RVID_ERR("Can't create video buffer.\n");
+ goto error;
+ }
+
+ enc->cpb_num = get_cpb_num(enc);
+
+ if (!enc->cpb_num)
+ goto error;
+
+ get_buffer(((struct vl_video_buffer *) tmp_buf)->resources[0], NULL,
+ &tmp_surf);
+
+ cpb_size = (sscreen->info.chip_class < GFX9) ?
+ align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+ align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
+ align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
+ align(tmp_surf->u.gfx9.surf_height, 32);
+
+ cpb_size = cpb_size * 3 / 2;
+ cpb_size = cpb_size * enc->cpb_num;
+ tmp_buf->destroy(tmp_buf);
+
+ if (!si_vid_create_buffer
+ (enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't create CPB buffer.\n");
+ goto error;
+ }
+
+ radeon_uvd_enc_1_1_init(enc);
+
+ return &enc->base;
+
+ error:
+ if (enc->cs)
+ enc->ws->cs_destroy(enc->cs);
+
+ si_vid_destroy_buffer(&enc->cpb);
+
+ FREE(enc);
+ return NULL;
+}
+
+bool
+si_radeon_uvd_enc_supported(struct si_screen * sscreen)
+{
+ return (sscreen->info.uvd_enc_supported);
+}
--
2.7.4
Boyuan Zhang
2018-02-07 22:43:01 UTC
Permalink
Better to add it to Makefile.source and Meson in this patch. Other than
this,
Post by James Zhu
Implement UVD hevc encode functions
---
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++++++++++++++++++++++
1 file changed, 370 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 0000000..f162589
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,370 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+static void
+radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc,
+ struct pipe_h265_enc_picture_desc *pic)
+{
+ enc->enc_pic.picture_type = pic->picture_type;
+ enc->enc_pic.frame_num = pic->frame_num;
+ enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+ enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+ enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+ enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+ enc->enc_pic.not_referenced = pic->not_referenced;
+ enc->enc_pic.is_idr = (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR)
+ || (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I);
+ enc->enc_pic.crop_left = 0;
+ enc->enc_pic.crop_right =
+ (align(enc->base.width, 16) - enc->base.width) / 2;
+ enc->enc_pic.crop_top = 0;
+ enc->enc_pic.crop_bottom =
+ (align(enc->base.height, 16) - enc->base.height) / 2;
+ enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+ enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+ enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+ enc->enc_pic.max_poc = pic->seq.intra_period;
+ enc->enc_pic.log2_max_poc = 0;
+ for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+ enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+ enc->enc_pic.pic_width_in_luma_samples =
+ pic->seq.pic_width_in_luma_samples;
+ enc->enc_pic.pic_height_in_luma_samples =
+ pic->seq.pic_height_in_luma_samples;
+ enc->enc_pic.log2_diff_max_min_luma_coding_block_size =
+ pic->seq.log2_diff_max_min_luma_coding_block_size;
+ enc->enc_pic.log2_min_transform_block_size_minus2 =
+ pic->seq.log2_min_transform_block_size_minus2;
+ enc->enc_pic.log2_diff_max_min_transform_block_size =
+ pic->seq.log2_diff_max_min_transform_block_size;
+ enc->enc_pic.max_transform_hierarchy_depth_inter =
+ pic->seq.max_transform_hierarchy_depth_inter;
+ enc->enc_pic.max_transform_hierarchy_depth_intra =
+ pic->seq.max_transform_hierarchy_depth_intra;
+ enc->enc_pic.log2_parallel_merge_level_minus2 =
+ pic->pic.log2_parallel_merge_level_minus2;
+ enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8;
+ enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8;
+ enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type;
+ enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand;
+ enc->enc_pic.sample_adaptive_offset_enabled_flag =
+ pic->seq.sample_adaptive_offset_enabled_flag;
+ enc->enc_pic.pcm_enabled_flag = pic->seq.pcm_enabled_flag;
+ enc->enc_pic.sps_temporal_mvp_enabled_flag =
+ pic->seq.sps_temporal_mvp_enabled_flag;
+}
+
+static void
+flush(struct radeon_uvd_encoder *enc)
+{
+ enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL);
+}
+
+static void
+radeon_uvd_enc_flush(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_cs_flush(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ // just ignored
+}
+
+static unsigned
+get_cpb_num(struct radeon_uvd_encoder *enc)
+{
+ unsigned w = align(enc->base.width, 16) / 16;
+ unsigned h = align(enc->base.height, 16) / 16;
+ unsigned dpb;
+
+ switch (enc->base.level) {
+ dpb = 396;
+ break;
+ dpb = 900;
+ break;
+ dpb = 2376;
+ break;
+ dpb = 4752;
+ break;
+ dpb = 8100;
+ break;
+ dpb = 18000;
+ break;
+ dpb = 20480;
+ break;
+ dpb = 32768;
+ break;
+ dpb = 34816;
+ break;
+ dpb = 110400;
+ break;
+ dpb = 184320;
+ break;
+ }
+
+ return MIN2(dpb / (w * h), 16);
+}
+
+static void
+radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ struct vl_video_buffer *vid_buf = (struct vl_video_buffer *) source;
+
+ radeon_uvd_enc_get_param(enc,
+ (struct pipe_h265_enc_picture_desc *) picture);
+
+ enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
+ enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
+
+ enc->need_feedback = false;
+
+ if (!enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->stream_handle = si_vid_alloc_stream_handle();
+ enc->si = CALLOC_STRUCT(rvid_buffer);
+ si_vid_create_buffer(enc->screen, enc->si, 128 * 1024,
+ PIPE_USAGE_STAGING);
+ si_vid_create_buffer(enc->screen, &fb, 4096, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->begin(enc, picture);
+ flush(enc);
+ si_vid_destroy_buffer(&fb);
+ }
+}
+
+static void
+radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_resource *destination, void **fb)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ enc->get_buffer(destination, &enc->bs_handle, NULL);
+ enc->bs_size = destination->width0;
+
+ *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
+
+ if (!si_vid_create_buffer(enc->screen, enc->fb, 4096, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't create feedback buffer.\n");
+ return;
+ }
+
+ enc->need_feedback = true;
+ enc->encode(enc);
+}
+
+static void
+radeon_uvd_enc_end_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_destroy(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+
+ if (enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->need_feedback = false;
+ si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->destroy(enc);
+ flush(enc);
+ si_vid_destroy_buffer(&fb);
+ }
+
+ si_vid_destroy_buffer(&enc->cpb);
+ enc->ws->cs_destroy(enc->cs);
+ FREE(enc);
+}
+
+static void
+radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder,
+ void *feedback, unsigned *size)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ struct rvid_buffer *fb = feedback;
+
+ if (NULL != size) {
+ radeon_uvd_enc_feedback_t *fb_data =
+ (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,
+ enc->cs,
+ PIPE_TRANSFER_READ_WRITE);
+
+ if (!fb_data->status)
+ *size = fb_data->bitstream_size;
+ else
+ *size = 0;
+ enc->ws->buffer_unmap(fb->res->buf);
+ }
+
+ si_vid_destroy_buffer(fb);
+ FREE(fb);
+}
+
+struct pipe_video_codec *
+radeon_uvd_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer get_buffer)
+{
+ struct si_screen *sscreen = (struct si_screen *) context->screen;
+ struct r600_common_context *rctx = (struct r600_common_context *) context;
+ struct radeon_uvd_encoder *enc;
+ struct pipe_video_buffer *tmp_buf, templat = { };
+ struct radeon_surf *tmp_surf;
+ unsigned cpb_size;
+
+ if (!si_radeon_uvd_enc_supported(sscreen)) {
+ RVID_ERR("Unsupported UVD ENC fw version loaded!\n");
+ return NULL;
+ }
+
+ enc = CALLOC_STRUCT(radeon_uvd_encoder);
+
+ if (!enc)
+ return NULL;
+
+ enc->base = *templ;
+ enc->base.context = context;
+ enc->base.destroy = radeon_uvd_enc_destroy;
+ enc->base.begin_frame = radeon_uvd_enc_begin_frame;
+ enc->base.encode_bitstream = radeon_uvd_enc_encode_bitstream;
+ enc->base.end_frame = radeon_uvd_enc_end_frame;
+ enc->base.flush = radeon_uvd_enc_flush;
+ enc->base.get_feedback = radeon_uvd_enc_get_feedback;
+ enc->get_buffer = get_buffer;
+ enc->bits_in_shifter = 0;
+ enc->screen = context->screen;
+ enc->ws = ws;
+ enc->cs =
+ ws->cs_create(rctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc);
+
+ if (!enc->cs) {
+ RVID_ERR("Can't get command submission context.\n");
+ goto error;
+ }
+
+ struct rvid_buffer si;
+ si_vid_create_buffer(enc->screen, &si, 128 * 1024, PIPE_USAGE_STAGING);
+ enc->si = &si;
+
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = enc->base.width;
+ templat.height = enc->base.height;
+ templat.interlaced = false;
+
+ if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
+ RVID_ERR("Can't create video buffer.\n");
+ goto error;
+ }
+
+ enc->cpb_num = get_cpb_num(enc);
+
+ if (!enc->cpb_num)
+ goto error;
+
+ get_buffer(((struct vl_video_buffer *) tmp_buf)->resources[0], NULL,
+ &tmp_surf);
+
+ cpb_size = (sscreen->info.chip_class < GFX9) ?
+ align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+ align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
+ align(tmp_surf->u.gfx9.surf_height, 32);
+
+ cpb_size = cpb_size * 3 / 2;
+ cpb_size = cpb_size * enc->cpb_num;
+ tmp_buf->destroy(tmp_buf);
+
+ if (!si_vid_create_buffer
+ (enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't create CPB buffer.\n");
+ goto error;
+ }
+
+ radeon_uvd_enc_1_1_init(enc);
+
+ return &enc->base;
+
+ if (enc->cs)
+ enc->ws->cs_destroy(enc->cs);
+
+ si_vid_destroy_buffer(&enc->cpb);
+
+ FREE(enc);
+ return NULL;
+}
+
+bool
+si_radeon_uvd_enc_supported(struct si_screen * sscreen)
+{
+ return (sscreen->info.uvd_enc_supported);
+}
Mark Thompson
2018-02-08 22:23:01 UTC
Permalink
Post by James Zhu
Implement UVD hevc encode functions
---
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++++++++++++++++++++++
1 file changed, 370 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 0000000..f162589
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,370 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+static void
+radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc,
+ struct pipe_h265_enc_picture_desc *pic)
+{
+ enc->enc_pic.picture_type = pic->picture_type;
+ enc->enc_pic.frame_num = pic->frame_num;
+ enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+ enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+ enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+ enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+ enc->enc_pic.not_referenced = pic->not_referenced;
+ enc->enc_pic.is_idr = (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR)
+ || (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I);
Looks very suspicious? I would expect that only IDR frames would be IDR.
Post by James Zhu
+ enc->enc_pic.crop_left = 0;
+ enc->enc_pic.crop_right =
+ (align(enc->base.width, 16) - enc->base.width) / 2;
+ enc->enc_pic.crop_top = 0;
+ enc->enc_pic.crop_bottom =
+ (align(enc->base.height, 16) - enc->base.height) / 2;
+ enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+ enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+ enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+ enc->enc_pic.max_poc = pic->seq.intra_period;
+ enc->enc_pic.log2_max_poc = 0;
+ for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+ enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+ enc->enc_pic.pic_width_in_luma_samples =
+ pic->seq.pic_width_in_luma_samples;
+ enc->enc_pic.pic_height_in_luma_samples =
+ pic->seq.pic_height_in_luma_samples;
+ enc->enc_pic.log2_diff_max_min_luma_coding_block_size =
+ pic->seq.log2_diff_max_min_luma_coding_block_size;
+ enc->enc_pic.log2_min_transform_block_size_minus2 =
+ pic->seq.log2_min_transform_block_size_minus2;
+ enc->enc_pic.log2_diff_max_min_transform_block_size =
+ pic->seq.log2_diff_max_min_transform_block_size;
+ enc->enc_pic.max_transform_hierarchy_depth_inter =
+ pic->seq.max_transform_hierarchy_depth_inter;
+ enc->enc_pic.max_transform_hierarchy_depth_intra =
+ pic->seq.max_transform_hierarchy_depth_intra;
+ enc->enc_pic.log2_parallel_merge_level_minus2 =
+ pic->pic.log2_parallel_merge_level_minus2;
+ enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8;
+ enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8;
+ enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type;
+ enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand;
+ enc->enc_pic.sample_adaptive_offset_enabled_flag =
+ pic->seq.sample_adaptive_offset_enabled_flag;
+ enc->enc_pic.pcm_enabled_flag = pic->seq.pcm_enabled_flag;
+ enc->enc_pic.sps_temporal_mvp_enabled_flag =
+ pic->seq.sps_temporal_mvp_enabled_flag;
+}
+
+static void
+flush(struct radeon_uvd_encoder *enc)
+{
+ enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL);
+}
+
+static void
+radeon_uvd_enc_flush(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_cs_flush(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ // just ignored
+}
+
+static unsigned
+get_cpb_num(struct radeon_uvd_encoder *enc)
+{
+ unsigned w = align(enc->base.width, 16) / 16;
+ unsigned h = align(enc->base.height, 16) / 16;
+ unsigned dpb;
+
+ switch (enc->base.level) {
+ dpb = 396;
+ break;
+ dpb = 900;
+ break;
+ dpb = 2376;
+ break;
+ dpb = 4752;
+ break;
+ dpb = 8100;
+ break;
+ dpb = 18000;
+ break;
+ dpb = 20480;
+ break;
+ dpb = 32768;
+ break;
+ dpb = 34816;
+ break;
+ dpb = 110400;
+ break;
+ dpb = 184320;
+ break;
This appears to be copied from H.264 - the H.265 values are not the same.

Also, there are levels 6, 6.1 and 6.2.
Post by James Zhu
+ }
+
+ return MIN2(dpb / (w * h), 16);
+}
+
...
James Zhu
2018-02-09 20:37:47 UTC
Permalink
Hi Mark,

thanks for point them out. [PATCH v3 3/8] / [PATCH v3 4/8] / [PATCH v3
5/8] update according.

James.
Post by Mark Thompson
Post by James Zhu
Implement UVD hevc encode functions
---
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++++++++++++++++++++++
1 file changed, 370 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 0000000..f162589
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,370 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+static void
+radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc,
+ struct pipe_h265_enc_picture_desc *pic)
+{
+ enc->enc_pic.picture_type = pic->picture_type;
+ enc->enc_pic.frame_num = pic->frame_num;
+ enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+ enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+ enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+ enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+ enc->enc_pic.not_referenced = pic->not_referenced;
+ enc->enc_pic.is_idr = (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR)
+ || (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I);
Looks very suspicious? I would expect that only IDR frames would be IDR.
Post by James Zhu
+ enc->enc_pic.crop_left = 0;
+ enc->enc_pic.crop_right =
+ (align(enc->base.width, 16) - enc->base.width) / 2;
+ enc->enc_pic.crop_top = 0;
+ enc->enc_pic.crop_bottom =
+ (align(enc->base.height, 16) - enc->base.height) / 2;
+ enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+ enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+ enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+ enc->enc_pic.max_poc = pic->seq.intra_period;
+ enc->enc_pic.log2_max_poc = 0;
+ for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+ enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+ enc->enc_pic.pic_width_in_luma_samples =
+ pic->seq.pic_width_in_luma_samples;
+ enc->enc_pic.pic_height_in_luma_samples =
+ pic->seq.pic_height_in_luma_samples;
+ enc->enc_pic.log2_diff_max_min_luma_coding_block_size =
+ pic->seq.log2_diff_max_min_luma_coding_block_size;
+ enc->enc_pic.log2_min_transform_block_size_minus2 =
+ pic->seq.log2_min_transform_block_size_minus2;
+ enc->enc_pic.log2_diff_max_min_transform_block_size =
+ pic->seq.log2_diff_max_min_transform_block_size;
+ enc->enc_pic.max_transform_hierarchy_depth_inter =
+ pic->seq.max_transform_hierarchy_depth_inter;
+ enc->enc_pic.max_transform_hierarchy_depth_intra =
+ pic->seq.max_transform_hierarchy_depth_intra;
+ enc->enc_pic.log2_parallel_merge_level_minus2 =
+ pic->pic.log2_parallel_merge_level_minus2;
+ enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8;
+ enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8;
+ enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type;
+ enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand;
+ enc->enc_pic.sample_adaptive_offset_enabled_flag =
+ pic->seq.sample_adaptive_offset_enabled_flag;
+ enc->enc_pic.pcm_enabled_flag = pic->seq.pcm_enabled_flag;
+ enc->enc_pic.sps_temporal_mvp_enabled_flag =
+ pic->seq.sps_temporal_mvp_enabled_flag;
+}
+
+static void
+flush(struct radeon_uvd_encoder *enc)
+{
+ enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL);
+}
+
+static void
+radeon_uvd_enc_flush(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_cs_flush(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ // just ignored
+}
+
+static unsigned
+get_cpb_num(struct radeon_uvd_encoder *enc)
+{
+ unsigned w = align(enc->base.width, 16) / 16;
+ unsigned h = align(enc->base.height, 16) / 16;
+ unsigned dpb;
+
+ switch (enc->base.level) {
+ dpb = 396;
+ break;
+ dpb = 900;
+ break;
+ dpb = 2376;
+ break;
+ dpb = 4752;
+ break;
+ dpb = 8100;
+ break;
+ dpb = 18000;
+ break;
+ dpb = 20480;
+ break;
+ dpb = 32768;
+ break;
+ dpb = 34816;
+ break;
+ dpb = 110400;
+ break;
+ dpb = 184320;
+ break;
This appears to be copied from H.264 - the H.265 values are not the same.
Also, there are levels 6, 6.1 and 6.2.
Post by James Zhu
+ }
+
+ return MIN2(dpb / (w * h), 16);
+}
+
...
James Zhu
2018-02-09 20:35:21 UTC
Permalink
Implement UVD hevc encode functions

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc.c | 381 ++++++++++++++++++++++++++++
1 file changed, 381 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 0000000..ad8b951
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,381 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define UVD_HEVC_LEVEL_1 30
+#define UVD_HEVC_LEVEL_2 60
+#define UVD_HEVC_LEVEL_2_1 63
+#define UVD_HEVC_LEVEL_3 90
+#define UVD_HEVC_LEVEL_3_1 93
+#define UVD_HEVC_LEVEL_4 120
+#define UVD_HEVC_LEVEL_4_1 123
+#define UVD_HEVC_LEVEL_5 150
+#define UVD_HEVC_LEVEL_5_1 153
+#define UVD_HEVC_LEVEL_5_2 156
+#define UVD_HEVC_LEVEL_6 180
+#define UVD_HEVC_LEVEL_6_1 183
+#define UVD_HEVC_LEVEL_6_2 186
+
+static void
+radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc,
+ struct pipe_h265_enc_picture_desc *pic)
+{
+ enc->enc_pic.picture_type = pic->picture_type;
+ enc->enc_pic.frame_num = pic->frame_num;
+ enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+ enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+ enc->enc_pic.not_referenced = pic->not_referenced;
+ enc->enc_pic.is_iframe =
+ (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR)
+ || (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I);
+ enc->enc_pic.crop_left = 0;
+ enc->enc_pic.crop_right =
+ (align(enc->base.width, 16) - enc->base.width) / 2;
+ enc->enc_pic.crop_top = 0;
+ enc->enc_pic.crop_bottom =
+ (align(enc->base.height, 16) - enc->base.height) / 2;
+ enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+ enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+ enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+ enc->enc_pic.max_poc = pic->seq.intra_period;
+ enc->enc_pic.log2_max_poc = 0;
+ for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+ enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+ enc->enc_pic.pic_width_in_luma_samples =
+ pic->seq.pic_width_in_luma_samples;
+ enc->enc_pic.pic_height_in_luma_samples =
+ pic->seq.pic_height_in_luma_samples;
+ enc->enc_pic.log2_diff_max_min_luma_coding_block_size =
+ pic->seq.log2_diff_max_min_luma_coding_block_size;
+ enc->enc_pic.log2_min_transform_block_size_minus2 =
+ pic->seq.log2_min_transform_block_size_minus2;
+ enc->enc_pic.log2_diff_max_min_transform_block_size =
+ pic->seq.log2_diff_max_min_transform_block_size;
+ enc->enc_pic.max_transform_hierarchy_depth_inter =
+ pic->seq.max_transform_hierarchy_depth_inter;
+ enc->enc_pic.max_transform_hierarchy_depth_intra =
+ pic->seq.max_transform_hierarchy_depth_intra;
+ enc->enc_pic.log2_parallel_merge_level_minus2 =
+ pic->pic.log2_parallel_merge_level_minus2;
+ enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8;
+ enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8;
+ enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type;
+ enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand;
+ enc->enc_pic.sample_adaptive_offset_enabled_flag =
+ pic->seq.sample_adaptive_offset_enabled_flag;
+ enc->enc_pic.pcm_enabled_flag = pic->seq.pcm_enabled_flag;
+ enc->enc_pic.sps_temporal_mvp_enabled_flag =
+ pic->seq.sps_temporal_mvp_enabled_flag;
+}
+
+static void
+flush(struct radeon_uvd_encoder *enc)
+{
+ enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL);
+}
+
+static void
+radeon_uvd_enc_flush(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_cs_flush(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ // just ignored
+}
+
+static unsigned
+get_cpb_num(struct radeon_uvd_encoder *enc)
+{
+ unsigned w = align(enc->base.width, 16) / 16;
+ unsigned h = align(enc->base.height, 16) / 16;
+ unsigned dpb;
+
+ switch (enc->base.level) {
+ case UVD_HEVC_LEVEL_1:
+ dpb = 36864;
+ break;
+
+ case UVD_HEVC_LEVEL_2:
+ dpb = 122880;
+ break;
+
+ case UVD_HEVC_LEVEL_2_1:
+ dpb = 245760;
+ break;
+
+ case UVD_HEVC_LEVEL_3:
+ dpb = 552960;
+ break;
+
+ case UVD_HEVC_LEVEL_3_1:
+ dpb = 983040;
+ break;
+
+ case UVD_HEVC_LEVEL_4:
+ case UVD_HEVC_LEVEL_4_1:
+ dpb = 2228224;
+ break;
+
+ case UVD_HEVC_LEVEL_5:
+ case UVD_HEVC_LEVEL_5_1:
+ case UVD_HEVC_LEVEL_5_2:
+ dpb = 8912896;
+ break;
+
+ case UVD_HEVC_LEVEL_6:
+ case UVD_HEVC_LEVEL_6_1:
+ case UVD_HEVC_LEVEL_6_2:
+ default:
+ dpb = 35651584;
+ break;
+ }
+
+ return MIN2(dpb / (w * h), 16);
+}
+
+static void
+radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ struct vl_video_buffer *vid_buf = (struct vl_video_buffer *) source;
+
+ radeon_uvd_enc_get_param(enc,
+ (struct pipe_h265_enc_picture_desc *) picture);
+
+ enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
+ enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
+
+ enc->need_feedback = false;
+
+ if (!enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->stream_handle = si_vid_alloc_stream_handle();
+ enc->si = CALLOC_STRUCT(rvid_buffer);
+ si_vid_create_buffer(enc->screen, enc->si, 128 * 1024,
+ PIPE_USAGE_STAGING);
+ si_vid_create_buffer(enc->screen, &fb, 4096, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->begin(enc, picture);
+ flush(enc);
+ si_vid_destroy_buffer(&fb);
+ }
+}
+
+static void
+radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_resource *destination, void **fb)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ enc->get_buffer(destination, &enc->bs_handle, NULL);
+ enc->bs_size = destination->width0;
+
+ *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
+
+ if (!si_vid_create_buffer(enc->screen, enc->fb, 4096, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't create feedback buffer.\n");
+ return;
+ }
+
+ enc->need_feedback = true;
+ enc->encode(enc);
+}
+
+static void
+radeon_uvd_enc_end_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_destroy(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+
+ if (enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->need_feedback = false;
+ si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->destroy(enc);
+ flush(enc);
+ si_vid_destroy_buffer(&fb);
+ }
+
+ si_vid_destroy_buffer(&enc->cpb);
+ enc->ws->cs_destroy(enc->cs);
+ FREE(enc);
+}
+
+static void
+radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder,
+ void *feedback, unsigned *size)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ struct rvid_buffer *fb = feedback;
+
+ if (NULL != size) {
+ radeon_uvd_enc_feedback_t *fb_data =
+ (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,
+ enc->cs,
+ PIPE_TRANSFER_READ_WRITE);
+
+ if (!fb_data->status)
+ *size = fb_data->bitstream_size;
+ else
+ *size = 0;
+ enc->ws->buffer_unmap(fb->res->buf);
+ }
+
+ si_vid_destroy_buffer(fb);
+ FREE(fb);
+}
+
+struct pipe_video_codec *
+radeon_uvd_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer get_buffer)
+{
+ struct si_screen *sscreen = (struct si_screen *) context->screen;
+ struct r600_common_context *rctx = (struct r600_common_context *) context;
+ struct radeon_uvd_encoder *enc;
+ struct pipe_video_buffer *tmp_buf, templat = { };
+ struct radeon_surf *tmp_surf;
+ unsigned cpb_size;
+
+ if (!si_radeon_uvd_enc_supported(sscreen)) {
+ RVID_ERR("Unsupported UVD ENC fw version loaded!\n");
+ return NULL;
+ }
+
+ enc = CALLOC_STRUCT(radeon_uvd_encoder);
+
+ if (!enc)
+ return NULL;
+
+ enc->base = *templ;
+ enc->base.context = context;
+ enc->base.destroy = radeon_uvd_enc_destroy;
+ enc->base.begin_frame = radeon_uvd_enc_begin_frame;
+ enc->base.encode_bitstream = radeon_uvd_enc_encode_bitstream;
+ enc->base.end_frame = radeon_uvd_enc_end_frame;
+ enc->base.flush = radeon_uvd_enc_flush;
+ enc->base.get_feedback = radeon_uvd_enc_get_feedback;
+ enc->get_buffer = get_buffer;
+ enc->bits_in_shifter = 0;
+ enc->screen = context->screen;
+ enc->ws = ws;
+ enc->cs =
+ ws->cs_create(rctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc);
+
+ if (!enc->cs) {
+ RVID_ERR("Can't get command submission context.\n");
+ goto error;
+ }
+
+ struct rvid_buffer si;
+ si_vid_create_buffer(enc->screen, &si, 128 * 1024, PIPE_USAGE_STAGING);
+ enc->si = &si;
+
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = enc->base.width;
+ templat.height = enc->base.height;
+ templat.interlaced = false;
+
+ if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
+ RVID_ERR("Can't create video buffer.\n");
+ goto error;
+ }
+
+ enc->cpb_num = get_cpb_num(enc);
+
+ if (!enc->cpb_num)
+ goto error;
+
+ get_buffer(((struct vl_video_buffer *) tmp_buf)->resources[0], NULL,
+ &tmp_surf);
+
+ cpb_size = (sscreen->info.chip_class < GFX9) ?
+ align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+ align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
+ align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
+ align(tmp_surf->u.gfx9.surf_height, 32);
+
+ cpb_size = cpb_size * 3 / 2;
+ cpb_size = cpb_size * enc->cpb_num;
+ tmp_buf->destroy(tmp_buf);
+
+ if (!si_vid_create_buffer
+ (enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't create CPB buffer.\n");
+ goto error;
+ }
+
+ radeon_uvd_enc_1_1_init(enc);
+
+ return &enc->base;
+
+ error:
+ if (enc->cs)
+ enc->ws->cs_destroy(enc->cs);
+
+ si_vid_destroy_buffer(&enc->cpb);
+
+ FREE(enc);
+ return NULL;
+}
+
+bool
+si_radeon_uvd_enc_supported(struct si_screen * sscreen)
+{
+ return (sscreen->info.uvd_enc_supported);
+}
--
2.7.4
James Zhu
2018-02-12 15:14:39 UTC
Permalink
Implement UVD hevc encode functions

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/radeon_uvd_enc.c | 381 ++++++++++++++++++++++++++++
1 file changed, 381 insertions(+)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
new file mode 100644
index 0000000..94bd26a
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -0,0 +1,381 @@
+/**************************************************************************
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "radeonsi/si_pipe.h"
+#include "radeon_video.h"
+#include "radeon_uvd_enc.h"
+
+#define UVD_HEVC_LEVEL_1 30
+#define UVD_HEVC_LEVEL_2 60
+#define UVD_HEVC_LEVEL_2_1 63
+#define UVD_HEVC_LEVEL_3 90
+#define UVD_HEVC_LEVEL_3_1 93
+#define UVD_HEVC_LEVEL_4 120
+#define UVD_HEVC_LEVEL_4_1 123
+#define UVD_HEVC_LEVEL_5 150
+#define UVD_HEVC_LEVEL_5_1 153
+#define UVD_HEVC_LEVEL_5_2 156
+#define UVD_HEVC_LEVEL_6 180
+#define UVD_HEVC_LEVEL_6_1 183
+#define UVD_HEVC_LEVEL_6_2 186
+
+static void
+radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc,
+ struct pipe_h265_enc_picture_desc *pic)
+{
+ enc->enc_pic.picture_type = pic->picture_type;
+ enc->enc_pic.frame_num = pic->frame_num;
+ enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+ enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+ enc->enc_pic.not_referenced = pic->not_referenced;
+ enc->enc_pic.is_iframe =
+ (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR)
+ || (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I);
+ enc->enc_pic.crop_left = 0;
+ enc->enc_pic.crop_right =
+ (align(enc->base.width, 16) - enc->base.width) / 2;
+ enc->enc_pic.crop_top = 0;
+ enc->enc_pic.crop_bottom =
+ (align(enc->base.height, 16) - enc->base.height) / 2;
+ enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+ enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+ enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+ enc->enc_pic.max_poc = pic->seq.intra_period;
+ enc->enc_pic.log2_max_poc = 0;
+ for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+ enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+ enc->enc_pic.pic_width_in_luma_samples =
+ pic->seq.pic_width_in_luma_samples;
+ enc->enc_pic.pic_height_in_luma_samples =
+ pic->seq.pic_height_in_luma_samples;
+ enc->enc_pic.log2_diff_max_min_luma_coding_block_size =
+ pic->seq.log2_diff_max_min_luma_coding_block_size;
+ enc->enc_pic.log2_min_transform_block_size_minus2 =
+ pic->seq.log2_min_transform_block_size_minus2;
+ enc->enc_pic.log2_diff_max_min_transform_block_size =
+ pic->seq.log2_diff_max_min_transform_block_size;
+ enc->enc_pic.max_transform_hierarchy_depth_inter =
+ pic->seq.max_transform_hierarchy_depth_inter;
+ enc->enc_pic.max_transform_hierarchy_depth_intra =
+ pic->seq.max_transform_hierarchy_depth_intra;
+ enc->enc_pic.log2_parallel_merge_level_minus2 =
+ pic->pic.log2_parallel_merge_level_minus2;
+ enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8;
+ enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8;
+ enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type;
+ enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand;
+ enc->enc_pic.sample_adaptive_offset_enabled_flag =
+ pic->seq.sample_adaptive_offset_enabled_flag;
+ enc->enc_pic.pcm_enabled_flag = 0; /*HW not support PCM */
+ enc->enc_pic.sps_temporal_mvp_enabled_flag =
+ pic->seq.sps_temporal_mvp_enabled_flag;
+}
+
+static void
+flush(struct radeon_uvd_encoder *enc)
+{
+ enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL);
+}
+
+static void
+radeon_uvd_enc_flush(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_cs_flush(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ // just ignored
+}
+
+static unsigned
+get_cpb_num(struct radeon_uvd_encoder *enc)
+{
+ unsigned w = align(enc->base.width, 16) / 16;
+ unsigned h = align(enc->base.height, 16) / 16;
+ unsigned dpb;
+
+ switch (enc->base.level) {
+ case UVD_HEVC_LEVEL_1:
+ dpb = 36864;
+ break;
+
+ case UVD_HEVC_LEVEL_2:
+ dpb = 122880;
+ break;
+
+ case UVD_HEVC_LEVEL_2_1:
+ dpb = 245760;
+ break;
+
+ case UVD_HEVC_LEVEL_3:
+ dpb = 552960;
+ break;
+
+ case UVD_HEVC_LEVEL_3_1:
+ dpb = 983040;
+ break;
+
+ case UVD_HEVC_LEVEL_4:
+ case UVD_HEVC_LEVEL_4_1:
+ dpb = 2228224;
+ break;
+
+ case UVD_HEVC_LEVEL_5:
+ case UVD_HEVC_LEVEL_5_1:
+ case UVD_HEVC_LEVEL_5_2:
+ dpb = 8912896;
+ break;
+
+ case UVD_HEVC_LEVEL_6:
+ case UVD_HEVC_LEVEL_6_1:
+ case UVD_HEVC_LEVEL_6_2:
+ default:
+ dpb = 35651584;
+ break;
+ }
+
+ return MIN2(dpb / (w * h), 16);
+}
+
+static void
+radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ struct vl_video_buffer *vid_buf = (struct vl_video_buffer *) source;
+
+ radeon_uvd_enc_get_param(enc,
+ (struct pipe_h265_enc_picture_desc *) picture);
+
+ enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
+ enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
+
+ enc->need_feedback = false;
+
+ if (!enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->stream_handle = si_vid_alloc_stream_handle();
+ enc->si = CALLOC_STRUCT(rvid_buffer);
+ si_vid_create_buffer(enc->screen, enc->si, 128 * 1024,
+ PIPE_USAGE_STAGING);
+ si_vid_create_buffer(enc->screen, &fb, 4096, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->begin(enc, picture);
+ flush(enc);
+ si_vid_destroy_buffer(&fb);
+ }
+}
+
+static void
+radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_resource *destination, void **fb)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ enc->get_buffer(destination, &enc->bs_handle, NULL);
+ enc->bs_size = destination->width0;
+
+ *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
+
+ if (!si_vid_create_buffer(enc->screen, enc->fb, 4096, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't create feedback buffer.\n");
+ return;
+ }
+
+ enc->need_feedback = true;
+ enc->encode(enc);
+}
+
+static void
+radeon_uvd_enc_end_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ flush(enc);
+}
+
+static void
+radeon_uvd_enc_destroy(struct pipe_video_codec *encoder)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+
+ if (enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->need_feedback = false;
+ si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->destroy(enc);
+ flush(enc);
+ si_vid_destroy_buffer(&fb);
+ }
+
+ si_vid_destroy_buffer(&enc->cpb);
+ enc->ws->cs_destroy(enc->cs);
+ FREE(enc);
+}
+
+static void
+radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder,
+ void *feedback, unsigned *size)
+{
+ struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder;
+ struct rvid_buffer *fb = feedback;
+
+ if (NULL != size) {
+ radeon_uvd_enc_feedback_t *fb_data =
+ (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,
+ enc->cs,
+ PIPE_TRANSFER_READ_WRITE);
+
+ if (!fb_data->status)
+ *size = fb_data->bitstream_size;
+ else
+ *size = 0;
+ enc->ws->buffer_unmap(fb->res->buf);
+ }
+
+ si_vid_destroy_buffer(fb);
+ FREE(fb);
+}
+
+struct pipe_video_codec *
+radeon_uvd_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ struct radeon_winsys *ws,
+ radeon_uvd_enc_get_buffer get_buffer)
+{
+ struct si_screen *sscreen = (struct si_screen *) context->screen;
+ struct r600_common_context *rctx = (struct r600_common_context *) context;
+ struct radeon_uvd_encoder *enc;
+ struct pipe_video_buffer *tmp_buf, templat = { };
+ struct radeon_surf *tmp_surf;
+ unsigned cpb_size;
+
+ if (!si_radeon_uvd_enc_supported(sscreen)) {
+ RVID_ERR("Unsupported UVD ENC fw version loaded!\n");
+ return NULL;
+ }
+
+ enc = CALLOC_STRUCT(radeon_uvd_encoder);
+
+ if (!enc)
+ return NULL;
+
+ enc->base = *templ;
+ enc->base.context = context;
+ enc->base.destroy = radeon_uvd_enc_destroy;
+ enc->base.begin_frame = radeon_uvd_enc_begin_frame;
+ enc->base.encode_bitstream = radeon_uvd_enc_encode_bitstream;
+ enc->base.end_frame = radeon_uvd_enc_end_frame;
+ enc->base.flush = radeon_uvd_enc_flush;
+ enc->base.get_feedback = radeon_uvd_enc_get_feedback;
+ enc->get_buffer = get_buffer;
+ enc->bits_in_shifter = 0;
+ enc->screen = context->screen;
+ enc->ws = ws;
+ enc->cs =
+ ws->cs_create(rctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc);
+
+ if (!enc->cs) {
+ RVID_ERR("Can't get command submission context.\n");
+ goto error;
+ }
+
+ struct rvid_buffer si;
+ si_vid_create_buffer(enc->screen, &si, 128 * 1024, PIPE_USAGE_STAGING);
+ enc->si = &si;
+
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = enc->base.width;
+ templat.height = enc->base.height;
+ templat.interlaced = false;
+
+ if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
+ RVID_ERR("Can't create video buffer.\n");
+ goto error;
+ }
+
+ enc->cpb_num = get_cpb_num(enc);
+
+ if (!enc->cpb_num)
+ goto error;
+
+ get_buffer(((struct vl_video_buffer *) tmp_buf)->resources[0], NULL,
+ &tmp_surf);
+
+ cpb_size = (sscreen->info.chip_class < GFX9) ?
+ align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+ align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
+ align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
+ align(tmp_surf->u.gfx9.surf_height, 32);
+
+ cpb_size = cpb_size * 3 / 2;
+ cpb_size = cpb_size * enc->cpb_num;
+ tmp_buf->destroy(tmp_buf);
+
+ if (!si_vid_create_buffer
+ (enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't create CPB buffer.\n");
+ goto error;
+ }
+
+ radeon_uvd_enc_1_1_init(enc);
+
+ return &enc->base;
+
+ error:
+ if (enc->cs)
+ enc->ws->cs_destroy(enc->cs);
+
+ si_vid_destroy_buffer(&enc->cpb);
+
+ FREE(enc);
+ return NULL;
+}
+
+bool
+si_radeon_uvd_enc_supported(struct si_screen * sscreen)
+{
+ return (sscreen->info.uvd_enc_supported);
+}
--
2.7.4
James Zhu
2018-02-06 20:05:39 UTC
Permalink
Support UVD HEVC encode in amdgpu cs

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 1927a3a..92d5394 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -376,6 +376,7 @@ static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs)
{
return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD &&
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
+ cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC &&
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
}
@@ -818,6 +819,10 @@ static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
break;

+ case RING_UVD_ENC:
+ cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
+ break;
+
case RING_VCE:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
break;
@@ -1533,6 +1538,7 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
break;
case RING_UVD:
+ case RING_UVD_ENC:
while (rcs->current.cdw & 15)
radeon_emit(rcs, 0x80000000); /* type2 nop packet */
break;
--
2.7.4
James Zhu
2018-02-06 20:05:43 UTC
Permalink
Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/Makefile.sources | 3 +++
1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index b756d72..f8ee860 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -15,6 +15,9 @@ C_SOURCES := \
radeon_vcn_enc_1_2.c \
radeon_vcn_enc.c \
radeon_vcn_enc.h \
+ radeon_uvd_enc_1_1.c \
+ radeon_uvd_enc.c \
+ radeon_uvd_enc.h \
radeon_vce_40_2_2.c \
radeon_vce_50.c \
radeon_vce_52.c \
--
2.7.4
Leo Liu
2018-02-07 14:47:36 UTC
Permalink
Post by James Zhu
---
src/gallium/drivers/radeon/Makefile.sources | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index b756d72..f8ee860 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -15,6 +15,9 @@ C_SOURCES := \
radeon_vcn_enc_1_2.c \
radeon_vcn_enc.c \
radeon_vcn_enc.h \
+ radeon_uvd_enc_1_1.c \
+ radeon_uvd_enc.c \
+ radeon_uvd_enc.h \
We need to add this to Meson build as well, like VCN ENC does.


Leo
Post by James Zhu
radeon_vce_40_2_2.c \
radeon_vce_50.c \
radeon_vce_52.c \
Christian König
2018-02-07 14:48:59 UTC
Permalink
Post by Leo Liu
---
  src/gallium/drivers/radeon/Makefile.sources | 3 +++
  1 file changed, 3 insertions(+)
diff --git a/src/gallium/drivers/radeon/Makefile.sources
b/src/gallium/drivers/radeon/Makefile.sources
index b756d72..f8ee860 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -15,6 +15,9 @@ C_SOURCES := \
      radeon_vcn_enc_1_2.c \
      radeon_vcn_enc.c \
      radeon_vcn_enc.h \
+    radeon_uvd_enc_1_1.c \
+    radeon_uvd_enc.c \
+    radeon_uvd_enc.h \
We need to add this to Meson build as well, like VCN ENC does.
And we usually add files to Makefile.source and Meson when they are
first added/used.

So adding all at once is not the usual approach.

Christian.
Post by Leo Liu
Leo
      radeon_vce_40_2_2.c \
      radeon_vce_50.c \
      radeon_vce_52.c \
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
James Zhu
2018-02-07 16:45:32 UTC
Permalink
Post by Christian König
Post by Leo Liu
---
  src/gallium/drivers/radeon/Makefile.sources | 3 +++
  1 file changed, 3 insertions(+)
diff --git a/src/gallium/drivers/radeon/Makefile.sources
b/src/gallium/drivers/radeon/Makefile.sources
index b756d72..f8ee860 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -15,6 +15,9 @@ C_SOURCES := \
      radeon_vcn_enc_1_2.c \
      radeon_vcn_enc.c \
      radeon_vcn_enc.h \
+    radeon_uvd_enc_1_1.c \
+    radeon_uvd_enc.c \
+    radeon_uvd_enc.h \
We need to add this to Meson build as well, like VCN ENC does.
And we usually add files to Makefile.source and Meson when they are
first added/used.
So adding all at once is not the usual approach.
Christian.
Since it needs change other patches, I will keep all at once this time.
Next time, I will update build list when file is first-time added/used.
thanks! James
Post by Christian König
Post by Leo Liu
Leo
      radeon_vce_40_2_2.c \
      radeon_vce_50.c \
      radeon_vce_52.c \
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
James Zhu
2018-02-07 16:53:06 UTC
Permalink
add uvd hevc enc files in meson.build and Makefile.sources

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeon/Makefile.sources | 3 +++
src/gallium/drivers/radeon/meson.build | 3 +++
2 files changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index b756d72..f8ee860 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -15,6 +15,9 @@ C_SOURCES := \
radeon_vcn_enc_1_2.c \
radeon_vcn_enc.c \
radeon_vcn_enc.h \
+ radeon_uvd_enc_1_1.c \
+ radeon_uvd_enc.c \
+ radeon_uvd_enc.h \
radeon_vce_40_2_2.c \
radeon_vce_50.c \
radeon_vce_52.c \
diff --git a/src/gallium/drivers/radeon/meson.build b/src/gallium/drivers/radeon/meson.build
index 6857df3..582a5ff 100644
--- a/src/gallium/drivers/radeon/meson.build
+++ b/src/gallium/drivers/radeon/meson.build
@@ -35,6 +35,9 @@ files_libradeon = files(
'radeon_vcn_enc.h',
'radeon_vcn_dec.c',
'radeon_vcn_dec.h',
+ 'radeon_uvd_enc_1_1.c',
+ 'radeon_uvd_enc.c',
+ 'radeon_uvd_enc.h',
'radeon_vce_40_2_2.c',
'radeon_vce_50.c',
'radeon_vce_52.c',
--
2.7.4
James Zhu
2018-02-06 20:05:45 UTC
Permalink
Enable UVD encode for HEVC main profile

Signed-off-by: James Zhu <***@amd.com>
---
src/gallium/drivers/radeonsi/si_get.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 8002362..64f76b4 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -24,6 +24,7 @@
#include "si_pipe.h"
#include "radeon/radeon_video.h"
#include "radeon/radeon_vce.h"
+#include "radeon/radeon_uvd_enc.h"
#include "ac_llvm_util.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
@@ -587,7 +588,8 @@ static int si_get_video_param(struct pipe_screen *screen,
(si_vce_is_fw_version_supported(sscreen) ||
sscreen->info.family == CHIP_RAVEN)) ||
(profile == PIPE_VIDEO_PROFILE_HEVC_MAIN &&
- sscreen->info.family == CHIP_RAVEN);
+ (sscreen->info.family == CHIP_RAVEN ||
+ si_radeon_uvd_enc_supported(sscreen)));
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
--
2.7.4
Bas Nieuwenhuizen
2018-02-06 21:48:05 UTC
Permalink
Post by James Zhu
Based on amdgpu hardware query information to check if UVD hevc enc support
---
src/amd/common/ac_gpu_info.c | 10 +++++++++-
src/amd/common/ac_gpu_info.h | 1 +
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 6d9dcb5..2494967 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
{
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
- struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
+ struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
int r, i, j;
drmDevicePtr devinfo;
@@ -166,6 +166,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
return false;
}
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, &uvd_enc);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n");
+ return false;
+ }
Don't we need to check info->drm_minor here too, as old AMDGPU might
not support the query?
Post by James Zhu
+
if (info->drm_major == 3 && info->drm_minor >= 17) {
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec);
if (r) {
@@ -275,6 +281,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
uvd.available_rings ? uvd_version : 0;
info->vce_fw_version =
vce.available_rings ? vce_version : 0;
+ info->uvd_enc_supported =
+ uvd_enc.available_rings ? true : false;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index cca3e98..36714ee 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -65,6 +65,7 @@ struct radeon_info {
uint32_t num_compute_rings;
uint32_t uvd_fw_version;
uint32_t vce_fw_version;
+ bool uvd_enc_supported;
uint32_t me_fw_version;
uint32_t me_fw_feature;
uint32_t pfp_fw_version;
--
2.7.4
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
James Zhu
2018-02-07 16:41:57 UTC
Permalink
Based on amdgpu hardware query information to check if UVD hevc enc support

Signed-off-by: James Zhu <***@amd.com>
---
src/amd/common/ac_gpu_info.c | 12 +++++++++++-
src/amd/common/ac_gpu_info.h | 1 +
2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 6d9dcb5..3156df6 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
{
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
- struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
+ struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
int r, i, j;
drmDevicePtr devinfo;
@@ -167,6 +167,14 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
}

if (info->drm_major == 3 && info->drm_minor >= 17) {
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, &uvd_enc);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n");
+ return false;
+ }
+ }
+
+ if (info->drm_major == 3 && info->drm_minor >= 17) {
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_dec) failed.\n");
@@ -275,6 +283,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
uvd.available_rings ? uvd_version : 0;
info->vce_fw_version =
vce.available_rings ? vce_version : 0;
+ info->uvd_enc_supported =
+ uvd_enc.available_rings ? true : false;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index cca3e98..36714ee 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -65,6 +65,7 @@ struct radeon_info {
uint32_t num_compute_rings;
uint32_t uvd_fw_version;
uint32_t vce_fw_version;
+ bool uvd_enc_supported;
uint32_t me_fw_version;
uint32_t me_fw_feature;
uint32_t pfp_fw_version;
--
2.7.4
Marek Olšák
2018-02-08 14:54:05 UTC
Permalink
Reviewed-by: Marek Olšák <***@amd.com>

Marek
Post by James Zhu
Based on amdgpu hardware query information to check if UVD hevc enc support
---
src/amd/common/ac_gpu_info.c | 12 +++++++++++-
src/amd/common/ac_gpu_info.h | 1 +
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 6d9dcb5..3156df6 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
{
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
- struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
+ struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
int r, i, j;
drmDevicePtr devinfo;
@@ -167,6 +167,14 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
}
if (info->drm_major == 3 && info->drm_minor >= 17) {
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, &uvd_enc);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n");
+ return false;
+ }
+ }
+
+ if (info->drm_major == 3 && info->drm_minor >= 17) {
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_dec) failed.\n");
@@ -275,6 +283,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
uvd.available_rings ? uvd_version : 0;
info->vce_fw_version =
vce.available_rings ? vce_version : 0;
+ info->uvd_enc_supported =
+ uvd_enc.available_rings ? true : false;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index cca3e98..36714ee 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -65,6 +65,7 @@ struct radeon_info {
uint32_t num_compute_rings;
uint32_t uvd_fw_version;
uint32_t vce_fw_version;
+ bool uvd_enc_supported;
uint32_t me_fw_version;
uint32_t me_fw_feature;
uint32_t pfp_fw_version;
--
2.7.4
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Mark Thompson
2018-02-08 22:28:30 UTC
Permalink
The whole series are the updated version. Changes are made mainly based
on the comments from prevous code review from Alex, Leo and Boyuan
amd/common:add uvd hevc enc support check in hw query
winsys/amdgpu:add uvd hevc enc support in amdgpu cs
radeon/uvd:add uvd hevc enc hw interface header
radeon/uvd:add uvd hevc enc hw ib implementation
radeon/uvd:add uvd hevc enc functions
radeon/uvd:add uvd hevc enc files in Makefile list
radeonsi:create uvd hevc enc entry
radeonsi: enable uvd encode for HEVC main
src/amd/common/ac_gpu_info.c | 10 +-
src/amd/common/ac_gpu_info.h | 1 +
src/gallium/drivers/radeon/Makefile.sources | 3 +
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
src/gallium/drivers/radeonsi/si_get.c | 4 +-
src/gallium/drivers/radeonsi/si_uvd.c | 15 +-
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +
9 files changed, 1990 insertions(+), 5 deletions(-)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
Can you explain what the requirements are for using this (hardware, firmware, software)?

From what I can find it should be on Polaris and Vega, but I haven't succeeded in getting it working on Polaris.

Thanks,

- Mark
Alex Deucher
2018-02-08 22:37:24 UTC
Permalink
Post by Mark Thompson
The whole series are the updated version. Changes are made mainly based
on the comments from prevous code review from Alex, Leo and Boyuan
amd/common:add uvd hevc enc support check in hw query
winsys/amdgpu:add uvd hevc enc support in amdgpu cs
radeon/uvd:add uvd hevc enc hw interface header
radeon/uvd:add uvd hevc enc hw ib implementation
radeon/uvd:add uvd hevc enc functions
radeon/uvd:add uvd hevc enc files in Makefile list
radeonsi:create uvd hevc enc entry
radeonsi: enable uvd encode for HEVC main
src/amd/common/ac_gpu_info.c | 10 +-
src/amd/common/ac_gpu_info.h | 1 +
src/gallium/drivers/radeon/Makefile.sources | 3 +
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
src/gallium/drivers/radeonsi/si_get.c | 4 +-
src/gallium/drivers/radeonsi/si_uvd.c | 15 +-
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +
9 files changed, 1990 insertions(+), 5 deletions(-)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
Can you explain what the requirements are for using this (hardware, firmware, software)?
From what I can find it should be on Polaris and Vega, but I haven't succeeded in getting it working on Polaris.
Yes, polaris and vega10. For polaris, you'll need a kernel that
enables the uvd enc rings. Patches went upstream last year, 4.14 I
think? 4.15 is a good bet. As for the polaris firmware, you'll need
version FW_1_130_16 or newer:
https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/commit/?id=2a713be25a44bd6cec90d8affc54b246a2ca9c7b

Alex
Post by Mark Thompson
Thanks,
- Mark
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Mark Thompson
2018-02-08 23:05:23 UTC
Permalink
Post by Alex Deucher
Post by Mark Thompson
The whole series are the updated version. Changes are made mainly based
on the comments from prevous code review from Alex, Leo and Boyuan
amd/common:add uvd hevc enc support check in hw query
winsys/amdgpu:add uvd hevc enc support in amdgpu cs
radeon/uvd:add uvd hevc enc hw interface header
radeon/uvd:add uvd hevc enc hw ib implementation
radeon/uvd:add uvd hevc enc functions
radeon/uvd:add uvd hevc enc files in Makefile list
radeonsi:create uvd hevc enc entry
radeonsi: enable uvd encode for HEVC main
src/amd/common/ac_gpu_info.c | 10 +-
src/amd/common/ac_gpu_info.h | 1 +
src/gallium/drivers/radeon/Makefile.sources | 3 +
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
src/gallium/drivers/radeonsi/si_get.c | 4 +-
src/gallium/drivers/radeonsi/si_uvd.c | 15 +-
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +
9 files changed, 1990 insertions(+), 5 deletions(-)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
Can you explain what the requirements are for using this (hardware, firmware, software)?
From what I can find it should be on Polaris and Vega, but I haven't succeeded in getting it working on Polaris.
Yes, polaris and vega10. For polaris, you'll need a kernel that
enables the uvd enc rings. Patches went upstream last year, 4.14 I
think? 4.15 is a good bet.
Ah, that's where I'm going wrong - despite the dates it's not actually in 4.14, so I need 4.15.
Post by Alex Deucher
As for the polaris firmware, you'll need
https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/commit/?id=2a713be25a44bd6cec90d8affc54b246a2ca9c7b
Thanks,

- Mark
Mark Thompson
2018-02-10 16:06:09 UTC
Permalink
Post by Mark Thompson
Post by Alex Deucher
Post by Mark Thompson
The whole series are the updated version. Changes are made mainly based
on the comments from prevous code review from Alex, Leo and Boyuan
amd/common:add uvd hevc enc support check in hw query
winsys/amdgpu:add uvd hevc enc support in amdgpu cs
radeon/uvd:add uvd hevc enc hw interface header
radeon/uvd:add uvd hevc enc hw ib implementation
radeon/uvd:add uvd hevc enc functions
radeon/uvd:add uvd hevc enc files in Makefile list
radeonsi:create uvd hevc enc entry
radeonsi: enable uvd encode for HEVC main
src/amd/common/ac_gpu_info.c | 10 +-
src/amd/common/ac_gpu_info.h | 1 +
src/gallium/drivers/radeon/Makefile.sources | 3 +
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
src/gallium/drivers/radeonsi/si_get.c | 4 +-
src/gallium/drivers/radeonsi/si_uvd.c | 15 +-
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +
9 files changed, 1990 insertions(+), 5 deletions(-)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
Can you explain what the requirements are for using this (hardware, firmware, software)?
From what I can find it should be on Polaris and Vega, but I haven't succeeded in getting it working on Polaris.
Yes, polaris and vega10. For polaris, you'll need a kernel that
enables the uvd enc rings. Patches went upstream last year, 4.14 I
think? 4.15 is a good bet.
Ah, that's where I'm going wrong - despite the dates it's not actually in 4.14, so I need 4.15.
Post by Alex Deucher
As for the polaris firmware, you'll need
https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/commit/?id=2a713be25a44bd6cec90d8affc54b246a2ca9c7b
Right, I have the encoder working with 4.15.2 on an RX 460 / Polaris 11 with firmware 1.130_16.

There seems to be some issue with using both encode and playback at the same time? It hangs the amdgpu driver and all userspaces processes interacting with it become stuck and unkillable, requiring a reboot to recover. It's completely repeatable, and only needs a few seconds to die when both mpv (playback) and ffmpeg (transcode) are running at the same time.

There is no message at all from the stuck driver, but I end up with hung tasks like:

[ 1209.317130] INFO: task kworker/u24:0:5 blocked for more than 120 seconds.
[ 1209.317132] Not tainted 4.15.2 #2
[ 1209.317133] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1209.317133] kworker/u24:0 D 0 5 2 0x80000000
[ 1209.317137] Workqueue: events_unbound commit_work
[ 1209.317138] Call Trace:
[ 1209.317142] ? __schedule+0x26b/0x840
[ 1209.317144] ? __update_load_avg_se.isra.37+0x1b6/0x1c0
[ 1209.317145] schedule+0x28/0x80
[ 1209.317146] schedule_timeout+0x1de/0x360
[ 1209.317177] ? dce110_timing_generator_get_position+0x51/0x60 [amdgpu]
[ 1209.317199] ? dce110_timing_generator_get_crtc_scanoutpos+0x6b/0xa0 [amdgpu]
[ 1209.317201] dma_fence_default_wait+0x1f6/0x280
[ 1209.317203] ? dma_fence_release+0x90/0x90
[ 1209.317204] dma_fence_wait_timeout+0x33/0xe0
[ 1209.317205] reservation_object_wait_timeout_rcu+0x198/0x340
[ 1209.317227] amdgpu_dm_do_flip+0x112/0x350 [amdgpu]
[ 1209.317248] amdgpu_dm_atomic_commit_tail+0x8a4/0x9a0 [amdgpu]
[ 1209.317250] ? pick_next_task_fair+0x14f/0x5f0
[ 1209.317251] commit_tail+0x3a/0x70
[ 1209.317252] process_one_work+0x17c/0x370
[ 1209.317253] worker_thread+0x2e/0x370
[ 1209.317255] ? process_one_work+0x370/0x370
[ 1209.317256] kthread+0x111/0x130
[ 1209.317257] ? kthread_create_worker_on_cpu+0x70/0x70
[ 1209.317258] ret_from_fork+0x1f/0x30
[ 1330.152054] INFO: task kworker/u24:0:5 blocked for more than 120 seconds.
[ 1330.152056] Not tainted 4.15.2 #2
[ 1330.152056] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152057] kworker/u24:0 D 0 5 2 0x80000000
[ 1330.152059] Workqueue: events_unbound commit_work
[ 1330.152060] Call Trace:
[ 1330.152063] ? __schedule+0x26b/0x840
[ 1330.152065] ? __update_load_avg_se.isra.37+0x1b6/0x1c0
[ 1330.152066] schedule+0x28/0x80
[ 1330.152067] schedule_timeout+0x1de/0x360
[ 1330.152108] ? dce110_timing_generator_get_position+0x51/0x60 [amdgpu]
[ 1330.152130] ? dce110_timing_generator_get_crtc_scanoutpos+0x6b/0xa0 [amdgpu]
[ 1330.152132] dma_fence_default_wait+0x1f6/0x280
[ 1330.152133] ? dma_fence_release+0x90/0x90
[ 1330.152134] dma_fence_wait_timeout+0x33/0xe0
[ 1330.152136] reservation_object_wait_timeout_rcu+0x198/0x340
[ 1330.152158] amdgpu_dm_do_flip+0x112/0x350 [amdgpu]
[ 1330.152179] amdgpu_dm_atomic_commit_tail+0x8a4/0x9a0 [amdgpu]
[ 1330.152180] ? pick_next_task_fair+0x14f/0x5f0
[ 1330.152181] commit_tail+0x3a/0x70
[ 1330.152183] process_one_work+0x17c/0x370
[ 1330.152184] worker_thread+0x2e/0x370
[ 1330.152185] ? process_one_work+0x370/0x370
[ 1330.152186] kthread+0x111/0x130
[ 1330.152187] ? kthread_create_worker_on_cpu+0x70/0x70
[ 1330.152188] ret_from_fork+0x1f/0x30
[ 1330.152196] INFO: task mpv/vo:3113 blocked for more than 120 seconds.
[ 1330.152197] Not tainted 4.15.2 #2
[ 1330.152197] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152198] mpv/vo D 0 3113 2983 0x80000006
[ 1330.152199] Call Trace:
[ 1330.152200] ? __schedule+0x26b/0x840
[ 1330.152201] schedule+0x28/0x80
[ 1330.152202] schedule_preempt_disabled+0xa/0x10
[ 1330.152204] __mutex_lock.isra.1+0x18e/0x4c0
[ 1330.152205] ? drm_release+0x36/0x3b0
[ 1330.152206] drm_release+0x36/0x3b0
[ 1330.152208] __fput+0xcd/0x1d0
[ 1330.152210] task_work_run+0x7b/0xa0
[ 1330.152211] do_exit+0x2d0/0xb10
[ 1330.152212] ? __check_object_size+0xaf/0x1b0
[ 1330.152214] ? _copy_to_user+0x22/0x30
[ 1330.152215] ? drm_ioctl+0x2ee/0x380
[ 1330.152216] do_group_exit+0x3a/0xa0
[ 1330.152217] get_signal+0x260/0x560
[ 1330.152219] do_signal+0x36/0x690
[ 1330.152231] ? amdgpu_drm_ioctl+0x6c/0x80 [amdgpu]
[ 1330.152233] ? do_vfs_ioctl+0xa1/0x610
[ 1330.152234] exit_to_usermode_loop+0x58/0x90
[ 1330.152235] do_syscall_64+0xe8/0xf0
[ 1330.152236] entry_SYSCALL_64_after_hwframe+0x21/0x86
[ 1330.152238] RIP: 0033:0x7f95a1036e6b
[ 1330.152238] RSP: 002b:00007f959b0fa0b0 EFLAGS: 00000293 ORIG_RAX: 0000000000000007
[ 1330.152239] RAX: fffffffffffffdfc RBX: 00007f959b0fa0f0 RCX: 00007f95a1036e6b
[ 1330.152240] RDX: ffffffffffffffff RSI: 0000000000000001 RDI: 00007f959b0fa0f0
[ 1330.152240] RBP: 0000000000000001 R08: 0000000000000000 R09: 00007f959b0fa400
[ 1330.152241] R10: 0000000000000106 R11: 0000000000000293 R12: 00007f95940376d8
[ 1330.152241] R13: 00007f95943e95a8 R14: 00000000ffffffff R15: 00007f959b0fa0f0
[ 1330.152243] INFO: task ffmpeg_g:3143 blocked for more than 120 seconds.
[ 1330.152243] Not tainted 4.15.2 #2
[ 1330.152244] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152244] ffmpeg_g D 0 3143 2402 0x80000006
[ 1330.152245] Call Trace:
[ 1330.152246] ? __schedule+0x26b/0x840
[ 1330.152247] schedule+0x28/0x80
[ 1330.152267] amd_sched_entity_push_job+0xa3/0xf0 [amdgpu]
[ 1330.152269] ? finish_wait+0x80/0x80
[ 1330.152288] amdgpu_job_submit+0x9c/0xc0 [amdgpu]
[ 1330.152303] amdgpu_vm_bo_update_mapping+0x383/0x3f0 [amdgpu]
[ 1330.152318] ? amdgpu_vm_free_mapping.isra.20+0x20/0x20 [amdgpu]
[ 1330.152331] amdgpu_vm_clear_freed+0xbb/0x190 [amdgpu]
[ 1330.152345] amdgpu_gem_object_close+0x19c/0x210 [amdgpu]
[ 1330.152348] ? drm_gem_object_release_handle+0x2c/0x90
[ 1330.152349] drm_gem_object_release_handle+0x2c/0x90
[ 1330.152350] ? drm_gem_object_handle_put_unlocked+0xb0/0xb0
[ 1330.152352] idr_for_each+0x48/0xe0
[ 1330.152353] drm_gem_release+0x1c/0x30
[ 1330.152354] drm_release+0x342/0x3b0
[ 1330.152356] __fput+0xcd/0x1d0
[ 1330.152357] task_work_run+0x7b/0xa0
[ 1330.152358] do_exit+0x2d0/0xb10
[ 1330.152359] do_group_exit+0x3a/0xa0
[ 1330.152360] get_signal+0x260/0x560
[ 1330.152361] do_signal+0x36/0x690
[ 1330.152363] ? __vma_rb_erase+0x1f6/0x270
[ 1330.152364] ? SyS_futex+0x12d/0x180
[ 1330.152365] exit_to_usermode_loop+0x58/0x90
[ 1330.152366] do_syscall_64+0xe8/0xf0
[ 1330.152367] entry_SYSCALL_64_after_hwframe+0x21/0x86
[ 1330.152368] RIP: 0033:0x7f60bb3df7dd
[ 1330.152368] RSP: 002b:00007f60927fbdd0 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
[ 1330.152369] RAX: fffffffffffffe00 RBX: 0000557f96700178 RCX: 00007f60bb3df7dd
[ 1330.152370] RDX: 0000000000000000 RSI: 0000000000000080 RDI: 0000557f967001a4
[ 1330.152370] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000557f96879778
[ 1330.152371] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000033
[ 1330.152371] R13: 0000557f96700208 R14: 0000000000000000 R15: 0000557f967001a4


Is that known? Is there anything else I can do with it?

- Mark
Zhu, James
2018-02-12 19:03:41 UTC
Permalink
On 2018-02-10 11:06 AM, Mark Thompson wrote:

On 08/02/18 23:05, Mark Thompson wrote:


On 08/02/18 22:37, Alex Deucher wrote:


On Thu, Feb 8, 2018 at 5:28 PM, Mark Thompson <***@jkqxz.net><mailto:***@jkqxz.net> wrote:


On 06/02/18 20:05, James Zhu wrote:


The whole series are the updated version. Changes are made mainly based
on the comments from prevous code review from Alex, Leo and Boyuan

James Zhu (8):
amd/common:add uvd hevc enc support check in hw query
winsys/amdgpu:add uvd hevc enc support in amdgpu cs
radeon/uvd:add uvd hevc enc hw interface header
radeon/uvd:add uvd hevc enc hw ib implementation
radeon/uvd:add uvd hevc enc functions
radeon/uvd:add uvd hevc enc files in Makefile list
radeonsi:create uvd hevc enc entry
radeonsi: enable uvd encode for HEVC main

src/amd/common/ac_gpu_info.c | 10 +-
src/amd/common/ac_gpu_info.h | 1 +
src/gallium/drivers/radeon/Makefile.sources | 3 +
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
src/gallium/drivers/radeonsi/si_get.c | 4 +-
src/gallium/drivers/radeonsi/si_uvd.c | 15 +-
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +
9 files changed, 1990 insertions(+), 5 deletions(-)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c



Can you explain what the requirements are for using this (hardware, firmware, software)?

From what I can find it should be on Polaris and Vega, but I haven't succeeded in getting it working on Polaris.


Yes, polaris and vega10. For polaris, you'll need a kernel that
enables the uvd enc rings. Patches went upstream last year, 4.14 I
think? 4.15 is a good bet.


Ah, that's where I'm going wrong - despite the dates it's not actually in 4.14, so I need 4.15.



As for the polaris firmware, you'll need
version FW_1_130_16 or newer:
https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/commit/?id=2a713be25a44bd6cec90d8affc54b246a2ca9c7b


Right, I have the encoder working with 4.15.2 on an RX 460 / Polaris 11 with firmware 1.130_16.

There seems to be some issue with using both encode and playback at the same time? It hangs the amdgpu driver and all userspaces processes interacting with it become stuck and unkillable, requiring a reboot to recover. It's completely repeatable, and only needs a few seconds to die when both mpv (playback) and ffmpeg (transcode) are running at the same time.

There is no message at all from the stuck driver, but I end up with hung tasks like:

[ 1209.317130] INFO: task kworker/u24:0:5 blocked for more than 120 seconds.
[ 1209.317132] Not tainted 4.15.2 #2
[ 1209.317133] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1209.317133] kworker/u24:0 D 0 5 2 0x80000000
[ 1209.317137] Workqueue: events_unbound commit_work
[ 1209.317138] Call Trace:
[ 1209.317142] ? __schedule+0x26b/0x840
[ 1209.317144] ? __update_load_avg_se.isra.37+0x1b6/0x1c0
[ 1209.317145] schedule+0x28/0x80
[ 1209.317146] schedule_timeout+0x1de/0x360
[ 1209.317177] ? dce110_timing_generator_get_position+0x51/0x60 [amdgpu]
[ 1209.317199] ? dce110_timing_generator_get_crtc_scanoutpos+0x6b/0xa0 [amdgpu]
[ 1209.317201] dma_fence_default_wait+0x1f6/0x280
[ 1209.317203] ? dma_fence_release+0x90/0x90
[ 1209.317204] dma_fence_wait_timeout+0x33/0xe0
[ 1209.317205] reservation_object_wait_timeout_rcu+0x198/0x340
[ 1209.317227] amdgpu_dm_do_flip+0x112/0x350 [amdgpu]
[ 1209.317248] amdgpu_dm_atomic_commit_tail+0x8a4/0x9a0 [amdgpu]
[ 1209.317250] ? pick_next_task_fair+0x14f/0x5f0
[ 1209.317251] commit_tail+0x3a/0x70
[ 1209.317252] process_one_work+0x17c/0x370
[ 1209.317253] worker_thread+0x2e/0x370
[ 1209.317255] ? process_one_work+0x370/0x370
[ 1209.317256] kthread+0x111/0x130
[ 1209.317257] ? kthread_create_worker_on_cpu+0x70/0x70
[ 1209.317258] ret_from_fork+0x1f/0x30
[ 1330.152054] INFO: task kworker/u24:0:5 blocked for more than 120 seconds.
[ 1330.152056] Not tainted 4.15.2 #2
[ 1330.152056] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152057] kworker/u24:0 D 0 5 2 0x80000000
[ 1330.152059] Workqueue: events_unbound commit_work
[ 1330.152060] Call Trace:
[ 1330.152063] ? __schedule+0x26b/0x840
[ 1330.152065] ? __update_load_avg_se.isra.37+0x1b6/0x1c0
[ 1330.152066] schedule+0x28/0x80
[ 1330.152067] schedule_timeout+0x1de/0x360
[ 1330.152108] ? dce110_timing_generator_get_position+0x51/0x60 [amdgpu]
[ 1330.152130] ? dce110_timing_generator_get_crtc_scanoutpos+0x6b/0xa0 [amdgpu]
[ 1330.152132] dma_fence_default_wait+0x1f6/0x280
[ 1330.152133] ? dma_fence_release+0x90/0x90
[ 1330.152134] dma_fence_wait_timeout+0x33/0xe0
[ 1330.152136] reservation_object_wait_timeout_rcu+0x198/0x340
[ 1330.152158] amdgpu_dm_do_flip+0x112/0x350 [amdgpu]
[ 1330.152179] amdgpu_dm_atomic_commit_tail+0x8a4/0x9a0 [amdgpu]
[ 1330.152180] ? pick_next_task_fair+0x14f/0x5f0
[ 1330.152181] commit_tail+0x3a/0x70
[ 1330.152183] process_one_work+0x17c/0x370
[ 1330.152184] worker_thread+0x2e/0x370
[ 1330.152185] ? process_one_work+0x370/0x370
[ 1330.152186] kthread+0x111/0x130
[ 1330.152187] ? kthread_create_worker_on_cpu+0x70/0x70
[ 1330.152188] ret_from_fork+0x1f/0x30
[ 1330.152196] INFO: task mpv/vo:3113 blocked for more than 120 seconds.
[ 1330.152197] Not tainted 4.15.2 #2
[ 1330.152197] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152198] mpv/vo D 0 3113 2983 0x80000006
[ 1330.152199] Call Trace:
[ 1330.152200] ? __schedule+0x26b/0x840
[ 1330.152201] schedule+0x28/0x80
[ 1330.152202] schedule_preempt_disabled+0xa/0x10
[ 1330.152204] __mutex_lock.isra.1+0x18e/0x4c0
[ 1330.152205] ? drm_release+0x36/0x3b0
[ 1330.152206] drm_release+0x36/0x3b0
[ 1330.152208] __fput+0xcd/0x1d0
[ 1330.152210] task_work_run+0x7b/0xa0
[ 1330.152211] do_exit+0x2d0/0xb10
[ 1330.152212] ? __check_object_size+0xaf/0x1b0
[ 1330.152214] ? _copy_to_user+0x22/0x30
[ 1330.152215] ? drm_ioctl+0x2ee/0x380
[ 1330.152216] do_group_exit+0x3a/0xa0
[ 1330.152217] get_signal+0x260/0x560
[ 1330.152219] do_signal+0x36/0x690
[ 1330.152231] ? amdgpu_drm_ioctl+0x6c/0x80 [amdgpu]
[ 1330.152233] ? do_vfs_ioctl+0xa1/0x610
[ 1330.152234] exit_to_usermode_loop+0x58/0x90
[ 1330.152235] do_syscall_64+0xe8/0xf0
[ 1330.152236] entry_SYSCALL_64_after_hwframe+0x21/0x86
[ 1330.152238] RIP: 0033:0x7f95a1036e6b
[ 1330.152238] RSP: 002b:00007f959b0fa0b0 EFLAGS: 00000293 ORIG_RAX: 0000000000000007
[ 1330.152239] RAX: fffffffffffffdfc RBX: 00007f959b0fa0f0 RCX: 00007f95a1036e6b
[ 1330.152240] RDX: ffffffffffffffff RSI: 0000000000000001 RDI: 00007f959b0fa0f0
[ 1330.152240] RBP: 0000000000000001 R08: 0000000000000000 R09: 00007f959b0fa400
[ 1330.152241] R10: 0000000000000106 R11: 0000000000000293 R12: 00007f95940376d8
[ 1330.152241] R13: 00007f95943e95a8 R14: 00000000ffffffff R15: 00007f959b0fa0f0
[ 1330.152243] INFO: task ffmpeg_g:3143 blocked for more than 120 seconds.
[ 1330.152243] Not tainted 4.15.2 #2
[ 1330.152244] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152244] ffmpeg_g D 0 3143 2402 0x80000006
[ 1330.152245] Call Trace:
[ 1330.152246] ? __schedule+0x26b/0x840
[ 1330.152247] schedule+0x28/0x80
[ 1330.152267] amd_sched_entity_push_job+0xa3/0xf0 [amdgpu]
[ 1330.152269] ? finish_wait+0x80/0x80
[ 1330.152288] amdgpu_job_submit+0x9c/0xc0 [amdgpu]
[ 1330.152303] amdgpu_vm_bo_update_mapping+0x383/0x3f0 [amdgpu]
[ 1330.152318] ? amdgpu_vm_free_mapping.isra.20+0x20/0x20 [amdgpu]
[ 1330.152331] amdgpu_vm_clear_freed+0xbb/0x190 [amdgpu]
[ 1330.152345] amdgpu_gem_object_close+0x19c/0x210 [amdgpu]
[ 1330.152348] ? drm_gem_object_release_handle+0x2c/0x90
[ 1330.152349] drm_gem_object_release_handle+0x2c/0x90
[ 1330.152350] ? drm_gem_object_handle_put_unlocked+0xb0/0xb0
[ 1330.152352] idr_for_each+0x48/0xe0
[ 1330.152353] drm_gem_release+0x1c/0x30
[ 1330.152354] drm_release+0x342/0x3b0
[ 1330.152356] __fput+0xcd/0x1d0
[ 1330.152357] task_work_run+0x7b/0xa0
[ 1330.152358] do_exit+0x2d0/0xb10
[ 1330.152359] do_group_exit+0x3a/0xa0
[ 1330.152360] get_signal+0x260/0x560
[ 1330.152361] do_signal+0x36/0x690
[ 1330.152363] ? __vma_rb_erase+0x1f6/0x270
[ 1330.152364] ? SyS_futex+0x12d/0x180
[ 1330.152365] exit_to_usermode_loop+0x58/0x90
[ 1330.152366] do_syscall_64+0xe8/0xf0
[ 1330.152367] entry_SYSCALL_64_after_hwframe+0x21/0x86
[ 1330.152368] RIP: 0033:0x7f60bb3df7dd
[ 1330.152368] RSP: 002b:00007f60927fbdd0 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
[ 1330.152369] RAX: fffffffffffffe00 RBX: 0000557f96700178 RCX: 00007f60bb3df7dd
[ 1330.152370] RDX: 0000000000000000 RSI: 0000000000000080 RDI: 0000557f967001a4
[ 1330.152370] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000557f96879778
[ 1330.152371] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000033
[ 1330.152371] R13: 0000557f96700208 R14: 0000000000000000 R15: 0000557f967001a4


Is that known? Is there anything else I can do with it?

- Mark


On baffin(Polaris 12), I run below commands simultaneously, there are no problem reported. The transcode result is good also.
$>w=1440;h=900;ffmpeg -s:v 1920x1088 -pix_fmt nv12 -r 1 -i BFBC2_576f_1920x1088.yuv -vf scale=${w}x${h} -c:v rawvideo -pix_fmt nv12 -frames 576 BFBC2_576f_${w}x${h}.yuv

$>mpv 1.h265


Luís reported "Deadlocks with multiple applications on AMD RX 460 and RX 550" in mailing list. It may only happened with Rx460.


James
James Zhu
2018-02-13 16:38:01 UTC
Permalink
Hi Mark,

Did you still encounter hung issue?

If yes, could you share me with your play and transcode streams and command line,
then I can try to reproduce at my side.

Thanks & Best Regards!

James Zhu
Post by Mark Thompson
Post by Mark Thompson
Post by Alex Deucher
Post by Mark Thompson
The whole series are the updated version. Changes are made mainly based
on the comments from prevous code review from Alex, Leo and Boyuan
amd/common:add uvd hevc enc support check in hw query
winsys/amdgpu:add uvd hevc enc support in amdgpu cs
radeon/uvd:add uvd hevc enc hw interface header
radeon/uvd:add uvd hevc enc hw ib implementation
radeon/uvd:add uvd hevc enc functions
radeon/uvd:add uvd hevc enc files in Makefile list
radeonsi:create uvd hevc enc entry
radeonsi: enable uvd encode for HEVC main
src/amd/common/ac_gpu_info.c | 10 +-
src/amd/common/ac_gpu_info.h | 1 +
src/gallium/drivers/radeon/Makefile.sources | 3 +
src/gallium/drivers/radeon/radeon_uvd_enc.c | 370 ++++++++
src/gallium/drivers/radeon/radeon_uvd_enc.h | 471 ++++++++++
src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
src/gallium/drivers/radeonsi/si_get.c | 4 +-
src/gallium/drivers/radeonsi/si_uvd.c | 15 +-
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +
9 files changed, 1990 insertions(+), 5 deletions(-)
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.c
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc.h
create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
Can you explain what the requirements are for using this (hardware, firmware, software)?
From what I can find it should be on Polaris and Vega, but I haven't succeeded in getting it working on Polaris.
Yes, polaris and vega10. For polaris, you'll need a kernel that
enables the uvd enc rings. Patches went upstream last year, 4.14 I
think? 4.15 is a good bet.
Ah, that's where I'm going wrong - despite the dates it's not actually in 4.14, so I need 4.15.
Post by Alex Deucher
As for the polaris firmware, you'll need
https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/commit/?id=2a713be25a44bd6cec90d8affc54b246a2ca9c7b
Right, I have the encoder working with 4.15.2 on an RX 460 / Polaris 11 with firmware 1.130_16.
There seems to be some issue with using both encode and playback at the same time? It hangs the amdgpu driver and all userspaces processes interacting with it become stuck and unkillable, requiring a reboot to recover. It's completely repeatable, and only needs a few seconds to die when both mpv (playback) and ffmpeg (transcode) are running at the same time.
[ 1209.317130] INFO: task kworker/u24:0:5 blocked for more than 120 seconds.
[ 1209.317132] Not tainted 4.15.2 #2
[ 1209.317133] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1209.317133] kworker/u24:0 D 0 5 2 0x80000000
[ 1209.317137] Workqueue: events_unbound commit_work
[ 1209.317142] ? __schedule+0x26b/0x840
[ 1209.317144] ? __update_load_avg_se.isra.37+0x1b6/0x1c0
[ 1209.317145] schedule+0x28/0x80
[ 1209.317146] schedule_timeout+0x1de/0x360
[ 1209.317177] ? dce110_timing_generator_get_position+0x51/0x60 [amdgpu]
[ 1209.317199] ? dce110_timing_generator_get_crtc_scanoutpos+0x6b/0xa0 [amdgpu]
[ 1209.317201] dma_fence_default_wait+0x1f6/0x280
[ 1209.317203] ? dma_fence_release+0x90/0x90
[ 1209.317204] dma_fence_wait_timeout+0x33/0xe0
[ 1209.317205] reservation_object_wait_timeout_rcu+0x198/0x340
[ 1209.317227] amdgpu_dm_do_flip+0x112/0x350 [amdgpu]
[ 1209.317248] amdgpu_dm_atomic_commit_tail+0x8a4/0x9a0 [amdgpu]
[ 1209.317250] ? pick_next_task_fair+0x14f/0x5f0
[ 1209.317251] commit_tail+0x3a/0x70
[ 1209.317252] process_one_work+0x17c/0x370
[ 1209.317253] worker_thread+0x2e/0x370
[ 1209.317255] ? process_one_work+0x370/0x370
[ 1209.317256] kthread+0x111/0x130
[ 1209.317257] ? kthread_create_worker_on_cpu+0x70/0x70
[ 1209.317258] ret_from_fork+0x1f/0x30
[ 1330.152054] INFO: task kworker/u24:0:5 blocked for more than 120 seconds.
[ 1330.152056] Not tainted 4.15.2 #2
[ 1330.152056] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152057] kworker/u24:0 D 0 5 2 0x80000000
[ 1330.152059] Workqueue: events_unbound commit_work
[ 1330.152063] ? __schedule+0x26b/0x840
[ 1330.152065] ? __update_load_avg_se.isra.37+0x1b6/0x1c0
[ 1330.152066] schedule+0x28/0x80
[ 1330.152067] schedule_timeout+0x1de/0x360
[ 1330.152108] ? dce110_timing_generator_get_position+0x51/0x60 [amdgpu]
[ 1330.152130] ? dce110_timing_generator_get_crtc_scanoutpos+0x6b/0xa0 [amdgpu]
[ 1330.152132] dma_fence_default_wait+0x1f6/0x280
[ 1330.152133] ? dma_fence_release+0x90/0x90
[ 1330.152134] dma_fence_wait_timeout+0x33/0xe0
[ 1330.152136] reservation_object_wait_timeout_rcu+0x198/0x340
[ 1330.152158] amdgpu_dm_do_flip+0x112/0x350 [amdgpu]
[ 1330.152179] amdgpu_dm_atomic_commit_tail+0x8a4/0x9a0 [amdgpu]
[ 1330.152180] ? pick_next_task_fair+0x14f/0x5f0
[ 1330.152181] commit_tail+0x3a/0x70
[ 1330.152183] process_one_work+0x17c/0x370
[ 1330.152184] worker_thread+0x2e/0x370
[ 1330.152185] ? process_one_work+0x370/0x370
[ 1330.152186] kthread+0x111/0x130
[ 1330.152187] ? kthread_create_worker_on_cpu+0x70/0x70
[ 1330.152188] ret_from_fork+0x1f/0x30
[ 1330.152196] INFO: task mpv/vo:3113 blocked for more than 120 seconds.
[ 1330.152197] Not tainted 4.15.2 #2
[ 1330.152197] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152198] mpv/vo D 0 3113 2983 0x80000006
[ 1330.152200] ? __schedule+0x26b/0x840
[ 1330.152201] schedule+0x28/0x80
[ 1330.152202] schedule_preempt_disabled+0xa/0x10
[ 1330.152204] __mutex_lock.isra.1+0x18e/0x4c0
[ 1330.152205] ? drm_release+0x36/0x3b0
[ 1330.152206] drm_release+0x36/0x3b0
[ 1330.152208] __fput+0xcd/0x1d0
[ 1330.152210] task_work_run+0x7b/0xa0
[ 1330.152211] do_exit+0x2d0/0xb10
[ 1330.152212] ? __check_object_size+0xaf/0x1b0
[ 1330.152214] ? _copy_to_user+0x22/0x30
[ 1330.152215] ? drm_ioctl+0x2ee/0x380
[ 1330.152216] do_group_exit+0x3a/0xa0
[ 1330.152217] get_signal+0x260/0x560
[ 1330.152219] do_signal+0x36/0x690
[ 1330.152231] ? amdgpu_drm_ioctl+0x6c/0x80 [amdgpu]
[ 1330.152233] ? do_vfs_ioctl+0xa1/0x610
[ 1330.152234] exit_to_usermode_loop+0x58/0x90
[ 1330.152235] do_syscall_64+0xe8/0xf0
[ 1330.152236] entry_SYSCALL_64_after_hwframe+0x21/0x86
[ 1330.152238] RIP: 0033:0x7f95a1036e6b
[ 1330.152238] RSP: 002b:00007f959b0fa0b0 EFLAGS: 00000293 ORIG_RAX: 0000000000000007
[ 1330.152239] RAX: fffffffffffffdfc RBX: 00007f959b0fa0f0 RCX: 00007f95a1036e6b
[ 1330.152240] RDX: ffffffffffffffff RSI: 0000000000000001 RDI: 00007f959b0fa0f0
[ 1330.152240] RBP: 0000000000000001 R08: 0000000000000000 R09: 00007f959b0fa400
[ 1330.152241] R10: 0000000000000106 R11: 0000000000000293 R12: 00007f95940376d8
[ 1330.152241] R13: 00007f95943e95a8 R14: 00000000ffffffff R15: 00007f959b0fa0f0
[ 1330.152243] INFO: task ffmpeg_g:3143 blocked for more than 120 seconds.
[ 1330.152243] Not tainted 4.15.2 #2
[ 1330.152244] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1330.152244] ffmpeg_g D 0 3143 2402 0x80000006
[ 1330.152246] ? __schedule+0x26b/0x840
[ 1330.152247] schedule+0x28/0x80
[ 1330.152267] amd_sched_entity_push_job+0xa3/0xf0 [amdgpu]
[ 1330.152269] ? finish_wait+0x80/0x80
[ 1330.152288] amdgpu_job_submit+0x9c/0xc0 [amdgpu]
[ 1330.152303] amdgpu_vm_bo_update_mapping+0x383/0x3f0 [amdgpu]
[ 1330.152318] ? amdgpu_vm_free_mapping.isra.20+0x20/0x20 [amdgpu]
[ 1330.152331] amdgpu_vm_clear_freed+0xbb/0x190 [amdgpu]
[ 1330.152345] amdgpu_gem_object_close+0x19c/0x210 [amdgpu]
[ 1330.152348] ? drm_gem_object_release_handle+0x2c/0x90
[ 1330.152349] drm_gem_object_release_handle+0x2c/0x90
[ 1330.152350] ? drm_gem_object_handle_put_unlocked+0xb0/0xb0
[ 1330.152352] idr_for_each+0x48/0xe0
[ 1330.152353] drm_gem_release+0x1c/0x30
[ 1330.152354] drm_release+0x342/0x3b0
[ 1330.152356] __fput+0xcd/0x1d0
[ 1330.152357] task_work_run+0x7b/0xa0
[ 1330.152358] do_exit+0x2d0/0xb10
[ 1330.152359] do_group_exit+0x3a/0xa0
[ 1330.152360] get_signal+0x260/0x560
[ 1330.152361] do_signal+0x36/0x690
[ 1330.152363] ? __vma_rb_erase+0x1f6/0x270
[ 1330.152364] ? SyS_futex+0x12d/0x180
[ 1330.152365] exit_to_usermode_loop+0x58/0x90
[ 1330.152366] do_syscall_64+0xe8/0xf0
[ 1330.152367] entry_SYSCALL_64_after_hwframe+0x21/0x86
[ 1330.152368] RIP: 0033:0x7f60bb3df7dd
[ 1330.152368] RSP: 002b:00007f60927fbdd0 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
[ 1330.152369] RAX: fffffffffffffe00 RBX: 0000557f96700178 RCX: 00007f60bb3df7dd
[ 1330.152370] RDX: 0000000000000000 RSI: 0000000000000080 RDI: 0000557f967001a4
[ 1330.152370] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000557f96879778
[ 1330.152371] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000033
[ 1330.152371] R13: 0000557f96700208 R14: 0000000000000000 R15: 0000557f967001a4
Is that known? Is there anything else I can do with it?
- Mark
Mark Thompson
2018-02-13 22:03:30 UTC
Permalink
Post by James Zhu
Hi Mark,
Did you still encounter hung issue?
If yes, could you share me with your play and transcode streams and command line,
then I can try to reproduce at my side.
Thanks & Best Regards!
James Zhu
Yes, it does still happen with the latest patches and vanila kernel 4.15.2, on an RX 460 / Polaris 11.


To reproduce:

Take a normal 1080p H.264 input file (I tried a few different ones and it didn't change anyway, if you want something exactly the same then the usual Big Buck Bunny video was among those tested).

Use the GPU to play back the video with mpv in a normal X session running on the AMD card (I'm running this via ssh in an otherwise-empty X instance):

mpv --fs --loop --no-audio --vo gpu --gpu-context=x11egl --hwdec=vaapi bbb_1080_264.mp4

Then transcode it to H.265 on the same device at the same time:

ffmpeg -y -hwaccel vaapi -hwaccel_device /dev/dri/renderD129 -hwaccel_output_format vaapi -i bbb_1080_264.mp4 -an -c:v hevc_vaapi -bf 0 out.mp4

and the GPU locks up completely very quickly (within a few seconds / a few hundred frames of starting).

That leaves unkillable zombie processes of everything which was touching the GPU at the time it died:

$ ps aux | grep [d]efunct
root 6994 0.4 0.0 0 0 ? Zsl 20:43 0:22 [Xorg] <defunct>
mrt 20601 0.3 0.0 0 0 ? Zl 21:50 0:02 [mpv] <defunct>
mrt 20630 0.0 0.0 0 0 ? Zl 21:51 0:00 [ffmpeg_g] <defunct>


To compare, encoding H.264 instead of H.265 at the same time with:

ffmpeg -y -hwaccel vaapi -hwaccel_device /dev/dri/renderD129 -hwaccel_output_format vaapi -i bbb_1080_264.mp4 -an -c:v h264_vaapi -profile constrained_baseline -bf 0 out.mp4

does not fail.


Thanks,

- Mark



Kernel messages:

[279612.955929] INFO: task kworker/u24:3:20617 blocked for more than 120 seconds.
[279612.955936] Not tainted 4.15.2 #2
[279612.955939] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[279612.955943] kworker/u24:3 D 0 20617 2 0x80000000
[279612.955957] Workqueue: events_unbound commit_work
[279612.955961] Call Trace:
[279612.955975] ? __schedule+0x26b/0x840
[279612.955982] schedule+0x28/0x80
[279612.955987] schedule_timeout+0x1de/0x360
[279612.956123] ? dce110_timing_generator_get_position+0x51/0x60 [amdgpu]
[279612.956246] ? dce110_timing_generator_get_crtc_scanoutpos+0x6b/0xa0 [amdgpu]
[279612.956254] dma_fence_default_wait+0x1f6/0x280
[279612.956261] ? dma_fence_release+0x90/0x90
[279612.956267] dma_fence_wait_timeout+0x33/0xe0
[279612.956274] reservation_object_wait_timeout_rcu+0x198/0x340
[279612.956396] amdgpu_dm_do_flip+0x112/0x350 [amdgpu]
[279612.956514] amdgpu_dm_atomic_commit_tail+0x8a4/0x9a0 [amdgpu]
[279612.956521] ? pick_next_task_fair+0x14f/0x5f0
[279612.956528] commit_tail+0x3a/0x70
[279612.956534] process_one_work+0x17c/0x370
[279612.956540] worker_thread+0x2e/0x370
[279612.956545] ? process_one_work+0x370/0x370
[279612.956551] kthread+0x111/0x130
[279612.956558] ? kthread_create_worker_on_cpu+0x70/0x70
[279612.956564] ret_from_fork+0x1f/0x30
[279733.790840] INFO: task amdgpu_cs:0:20607 blocked for more than 120 seconds.
[279733.790848] Not tainted 4.15.2 #2
[279733.790850] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[279733.790854] amdgpu_cs:0 D 0 20607 20087 0x80000002
[279733.790861] Call Trace:
[279733.790876] ? __schedule+0x26b/0x840
[279733.790883] schedule+0x28/0x80
[279733.790890] schedule_preempt_disabled+0xa/0x10
[279733.790898] __mutex_lock.isra.1+0x18e/0x4c0
[279733.790906] ? __slab_free+0x14b/0x300
[279733.790915] ? drm_release+0x36/0x3b0
[279733.790920] drm_release+0x36/0x3b0
[279733.790929] __fput+0xcd/0x1d0
[279733.790937] task_work_run+0x7b/0xa0
[279733.790943] do_exit+0x2d0/0xb10
[279733.790948] ? __check_object_size+0xaf/0x1b0
[279733.790954] do_group_exit+0x3a/0xa0
[279733.790960] get_signal+0x260/0x560
[279733.790968] do_signal+0x36/0x690
[279733.791053] ? amdgpu_drm_ioctl+0x6c/0x80 [amdgpu]
[279733.791060] ? do_vfs_ioctl+0xa1/0x610
[279733.791066] ? SyS_futex+0x12d/0x180
[279733.791072] exit_to_usermode_loop+0x58/0x90
[279733.791077] do_syscall_64+0xe8/0xf0
[279733.791082] entry_SYSCALL_64_after_hwframe+0x21/0x86
[279733.791088] RIP: 0033:0x7f769b8f27dd
[279733.791091] RSP: 002b:00007f768b6bbd70 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
[279733.791097] RAX: fffffffffffffe00 RBX: 00007f76902db2f0 RCX: 00007f769b8f27dd
[279733.791100] RDX: 0000000000000000 RSI: 0000000000000080 RDI: 00007f76902db318
[279733.791103] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
[279733.791106] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000000019f0
[279733.791109] R13: 00007f76902db2c8 R14: 0000000000000000 R15: 00007f76902db318
[279733.791115] INFO: task kworker/u24:3:20617 blocked for more than 120 seconds.
[279733.791119] Not tainted 4.15.2 #2
[279733.791121] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[279733.791124] kworker/u24:3 D 0 20617 2 0x80000000
[279733.791134] Workqueue: events_unbound commit_work
[279733.791138] Call Trace:
[279733.791145] ? __schedule+0x26b/0x840
[279733.791152] schedule+0x28/0x80
[279733.791156] schedule_timeout+0x1de/0x360
[279733.791282] ? dce110_timing_generator_get_position+0x51/0x60 [amdgpu]
[279733.791403] ? dce110_timing_generator_get_crtc_scanoutpos+0x6b/0xa0 [amdgpu]
[279733.791411] dma_fence_default_wait+0x1f6/0x280
[279733.791417] ? dma_fence_release+0x90/0x90
[279733.791423] dma_fence_wait_timeout+0x33/0xe0
[279733.791430] reservation_object_wait_timeout_rcu+0x198/0x340
[279733.791552] amdgpu_dm_do_flip+0x112/0x350 [amdgpu]
[279733.791673] amdgpu_dm_atomic_commit_tail+0x8a4/0x9a0 [amdgpu]
[279733.791680] ? pick_next_task_fair+0x14f/0x5f0
[279733.791686] commit_tail+0x3a/0x70
[279733.791692] process_one_work+0x17c/0x370
[279733.791697] worker_thread+0x2e/0x370
[279733.791702] ? process_one_work+0x370/0x370
[279733.791709] kthread+0x111/0x130
[279733.791715] ? kthread_create_worker_on_cpu+0x70/0x70
[279733.791721] ret_from_fork+0x1f/0x30
[279733.791728] INFO: task ffmpeg_g:20642 blocked for more than 120 seconds.
[279733.791731] Not tainted 4.15.2 #2
[279733.791733] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[279733.791736] ffmpeg_g D 0 20642 7139 0x80000006
[279733.791741] Call Trace:
[279733.791748] ? __schedule+0x26b/0x840
[279733.791755] schedule+0x28/0x80
[279733.791864] amd_sched_entity_push_job+0xa3/0xf0 [amdgpu]
[279733.791873] ? finish_wait+0x80/0x80
[279733.791977] amdgpu_job_submit+0x9c/0xc0 [amdgpu]
[279733.792062] amdgpu_vm_bo_update_mapping+0x383/0x3f0 [amdgpu]
[279733.792145] ? amdgpu_vm_free_mapping.isra.20+0x20/0x20 [amdgpu]
[279733.792225] amdgpu_vm_clear_freed+0xbb/0x190 [amdgpu]
[279733.792301] amdgpu_gem_object_close+0x19c/0x210 [amdgpu]
[279733.792313] ? drm_gem_object_release_handle+0x2c/0x90
[279733.792320] drm_gem_object_release_handle+0x2c/0x90
[279733.792327] ? drm_gem_object_handle_put_unlocked+0xb0/0xb0
[279733.792332] idr_for_each+0x48/0xe0
[279733.792340] drm_gem_release+0x1c/0x30
[279733.792346] drm_release+0x342/0x3b0
[279733.792353] __fput+0xcd/0x1d0
[279733.792360] task_work_run+0x7b/0xa0
[279733.792365] do_exit+0x2d0/0xb10
[279733.792371] do_group_exit+0x3a/0xa0
[279733.792376] get_signal+0x260/0x560
[279733.792384] do_signal+0x36/0x690
[279733.792392] ? __vma_rb_erase+0x1f6/0x270
[279733.792398] ? SyS_futex+0x12d/0x180
[279733.792403] exit_to_usermode_loop+0x58/0x90
[279733.792408] do_syscall_64+0xe8/0xf0
[279733.792413] entry_SYSCALL_64_after_hwframe+0x21/0x86
[279733.792417] RIP: 0033:0x7f77a8ab37dd
[279733.792421] RSP: 002b:00007f778affcdd0 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
[279733.792426] RAX: fffffffffffffe00 RBX: 0000556b00c44718 RCX: 00007f77a8ab37dd
[279733.792429] RDX: 0000000000000000 RSI: 0000000000000080 RDI: 0000556b00c44740
[279733.792432] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000556b00ce59b8
[279733.792435] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000014
[279733.792437] R13: 0000556b00c447a8 R14: 0000000000000000 R15: 0000556b00c44740
Mark Thompson
2018-02-16 18:31:01 UTC
Permalink
Post by James Zhu
Hi Mark,
I couldn't reproduce the issue on my Polaris 11 to run mpv / ffmpeg about 1.5 hours.
ffmpeg -y -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format vaapi -i video/Mr.Right.mp4 -an -c:v hevc_vaapi -bf 0 out.mp4
mpv --fs --loop --no-audio --vo gpu --gpu-context=x11egl --hwdec=vaapi video/Mr.Right.mp4
But it has some failure with vaDeriveImage. I am not  sure if this failure matters, the video still can play without any other error,
If it's calling vaDeriveImage() at all that suggests it isn't using the proper interop path, and may be falling back to software decode. This should work in recent versions of mpv with git Mesa and libva - maybe have a look at the verbose output and see what it's actually doing?
Post by James Zhu
mpv --fs --loop --no-audio --vo vaapi  --hwdec=vaapi video/Mr.Right.mp4
No error reported with this command line.
I haven't tried the legacy VAAPI test output, I'll try later to see if that also triggers the failure for me.


I don't think that this sort of issue should block the patches in Mesa because it looks likely that it is a kernel issue somehow - userspace shouldn't be able to nuke the GPU at all. Still, the feature is essentially unusable for me because of this problem, and I imagine it will apply to at least some other people with setups which are match mine in some way as yet unknown.

Thanks,

- Mark
James Zhu
2018-02-19 12:00:05 UTC
Permalink
Post by Mark Thompson
Post by James Zhu
Hi Mark,
I couldn't reproduce the issue on my Polaris 11 to run mpv / ffmpeg about 1.5 hours.
ffmpeg -y -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format vaapi -i video/Mr.Right.mp4 -an -c:v hevc_vaapi -bf 0 out.mp4
mpv --fs --loop --no-audio --vo gpu --gpu-context=x11egl --hwdec=vaapi video/Mr.Right.mp4
But it has some failure with vaDeriveImage. I am not  sure if this failure matters, the video still can play without any other error,
If it's calling vaDeriveImage() at all that suggests it isn't using the proper interop path, and may be falling back to software decode. This should work in recent versions of mpv with git Mesa and libva - maybe have a look at the verbose output and see what it's actually doing?
I think you are right, it should fall back to software decode. During
the weekend test, my system hung also with legacy VAAPI test output setting.
Post by Mark Thompson
Post by James Zhu
mpv --fs --loop --no-audio --vo vaapi  --hwdec=vaapi video/Mr.Right.mp4
No error reported with this command line.
I haven't tried the legacy VAAPI test output, I'll try later to see if that also triggers the failure for me.
I don't think that this sort of issue should block the patches in Mesa because it looks likely that it is a kernel issue somehow - userspace shouldn't be able to nuke the GPU at all. Still, the feature is essentially unusable for me because of this problem, and I imagine it will apply to at least some other people with setups which are match mine in some way as yet unknown.
Yeah, if there are no more comments from the community. We will push the
patches to the upstream tomorrow.
Post by Mark Thompson
Thanks,
- Mark
Loading...