Import libdrm 2.4.79

2017-04-14 07:45:34 +00:00 · 2017-04-14 07:45:34 +00:00 · 02337797c5
commit 02337797c5
parent 99c4447fac
33 changed files with 711 additions and 368 deletions
--- a/lib/libdrm/amdgpu/amdgpu-symbol-check
+++ b/lib/libdrm/amdgpu/amdgpu-symbol-check
@ -22,6 +22,7 @@ amdgpu_bo_list_update
 amdgpu_bo_query_info
 amdgpu_bo_set_metadata
 amdgpu_bo_va_op
+amdgpu_bo_va_op_raw
 amdgpu_bo_wait_for_idle
 amdgpu_create_bo_from_user_mem
 amdgpu_cs_create_semaphore
@ -45,6 +46,7 @@ amdgpu_query_heap_info
 amdgpu_query_hw_ip_count
 amdgpu_query_hw_ip_info
 amdgpu_query_info
+amdgpu_query_sensor_info
 amdgpu_read_mm_registers
 amdgpu_va_range_alloc
 amdgpu_va_range_free
--- a/lib/libdrm/amdgpu/amdgpu.h
+++ b/lib/libdrm/amdgpu/amdgpu.h
@ -1058,6 +1058,24 @@ int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id,
 int amdgpu_query_gds_info(amdgpu_device_handle dev,
 			struct amdgpu_gds_resource_info *gds_info);

+/**
+ * Query information about sensor.
+ *
+ * The return size is query-specific and depends on the "sensor_type"
+ * parameter. No more than "size" bytes is returned.
+ *
+ * \param   dev         - \c [in] Device handle. See #amdgpu_device_initialize()
+ * \param   sensor_type - \c [in] AMDGPU_INFO_SENSOR_*
+ * \param   size        - \c [in] Size of the returned value.
+ * \param   value       - \c [out] Pointer to the return value.
+ *
+ * \return   0 on success\n
+ *          <0 - Negative POSIX Error code
+ *
+*/
+int amdgpu_query_sensor_info(amdgpu_device_handle dev, unsigned sensor_type,
+			     unsigned size, void *value);
+
 /**
 * Read a set of consecutive memory-mapped registers.
 * Not all registers are allowed to be read by userspace.
@ -1185,6 +1203,34 @@ int amdgpu_bo_va_op(amdgpu_bo_handle bo,
 		    uint64_t flags,
 		    uint32_t ops);

+/**
+ *  VA mapping/unmapping for a buffer object or PRT region.
+ *
+ * This is not a simple drop-in extension for amdgpu_bo_va_op; instead, all
+ * parameters are treated "raw", i.e. size is not automatically aligned, and
+ * all flags must be specified explicitly.
+ *
+ * \param  dev		- \c [in] device handle
+ * \param  bo		- \c [in] BO handle (may be NULL)
+ * \param  offset	- \c [in] Start offset to map
+ * \param  size		- \c [in] Size to map
+ * \param  addr		- \c [in] Start virtual address.
+ * \param  flags	- \c [in] Supported flags for mapping/unmapping
+ * \param  ops		- \c [in] AMDGPU_VA_OP_MAP or AMDGPU_VA_OP_UNMAP
+ *
+ * \return   0 on success\n
+ *          <0 - Negative POSIX Error code
+ *
+*/
+
+int amdgpu_bo_va_op_raw(amdgpu_device_handle dev,
+			amdgpu_bo_handle bo,
+			uint64_t offset,
+			uint64_t size,
+			uint64_t addr,
+			uint64_t flags,
+			uint32_t ops);
+
 /**
 *  create semaphore
 *
--- a/lib/libdrm/amdgpu/amdgpu_device.c
+++ b/lib/libdrm/amdgpu/amdgpu_device.c
@ -131,10 +131,8 @@ static int amdgpu_get_auth(int fd, int *auth)

 static void amdgpu_device_free_internal(amdgpu_device_handle dev)
 {
-	amdgpu_vamgr_deinit(dev->vamgr);
-	free(dev->vamgr);
-	amdgpu_vamgr_deinit(dev->vamgr_32);
-	free(dev->vamgr_32);
+	amdgpu_vamgr_deinit(&dev->vamgr_32);
+	amdgpu_vamgr_deinit(&dev->vamgr);
 	util_hash_table_destroy(dev->bo_flink_names);
 	util_hash_table_destroy(dev->bo_handles);
 	pthread_mutex_destroy(&dev->bo_table_mutex);
@ -255,25 +253,18 @@ int amdgpu_device_initialize(int fd,
 	if (r)
 		goto cleanup;

-	dev->vamgr = calloc(1, sizeof(struct amdgpu_bo_va_mgr));
-	if (dev->vamgr == NULL)
-		goto cleanup;
-
-	amdgpu_vamgr_init(dev->vamgr, dev->dev_info.virtual_address_offset,
+	amdgpu_vamgr_init(&dev->vamgr, dev->dev_info.virtual_address_offset,
 			  dev->dev_info.virtual_address_max,
 			  dev->dev_info.virtual_address_alignment);

 	max = MIN2(dev->dev_info.virtual_address_max, 0xffffffff);
-	start = amdgpu_vamgr_find_va(dev->vamgr,
+	start = amdgpu_vamgr_find_va(&dev->vamgr,
 				     max - dev->dev_info.virtual_address_offset,
 				     dev->dev_info.virtual_address_alignment, 0);
 	if (start > 0xffffffff)
 		goto free_va; /* shouldn't get here */

-	dev->vamgr_32 =  calloc(1, sizeof(struct amdgpu_bo_va_mgr));
-	if (dev->vamgr_32 == NULL)
-		goto free_va;
-	amdgpu_vamgr_init(dev->vamgr_32, start, max,
+	amdgpu_vamgr_init(&dev->vamgr_32, start, max,
 			  dev->dev_info.virtual_address_alignment);

 	*major_version = dev->major_version;
@ -286,10 +277,9 @@ int amdgpu_device_initialize(int fd,

 free_va:
 	r = -ENOMEM;
-	amdgpu_vamgr_free_va(dev->vamgr, start,
+	amdgpu_vamgr_free_va(&dev->vamgr, start,
 			     max - dev->dev_info.virtual_address_offset);
-	amdgpu_vamgr_deinit(dev->vamgr);
-	free(dev->vamgr);
+	amdgpu_vamgr_deinit(&dev->vamgr);

 cleanup:
 	if (dev->fd >= 0)
--- a/lib/libdrm/amdgpu/amdgpu_gpu_info.c
+++ b/lib/libdrm/amdgpu/amdgpu_gpu_info.c
@ -169,54 +169,58 @@ drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
 	dev->info.vce_harvest_config = dev->dev_info.vce_harvest_config;
 	dev->info.pci_rev_id = dev->dev_info.pci_rev;

-	for (i = 0; i < (int)dev->info.num_shader_engines; i++) {
-		unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) |
-				    (AMDGPU_INFO_MMR_SH_INDEX_MASK <<
-				     AMDGPU_INFO_MMR_SH_INDEX_SHIFT);
+	if (dev->info.family_id < AMDGPU_FAMILY_AI) {
+		for (i = 0; i < (int)dev->info.num_shader_engines; i++) {
+			unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) |
+					    (AMDGPU_INFO_MMR_SH_INDEX_MASK <<
+					     AMDGPU_INFO_MMR_SH_INDEX_SHIFT);

-		r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0,
-					     &dev->info.backend_disable[i]);
-		if (r)
-			return r;
-		/* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */
-		dev->info.backend_disable[i] =
-			(dev->info.backend_disable[i] >> 16) & 0xff;
+			r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0,
+						     &dev->info.backend_disable[i]);
+			if (r)
+				return r;
+			/* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */
+			dev->info.backend_disable[i] =
+				(dev->info.backend_disable[i] >> 16) & 0xff;

-		r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
-					     &dev->info.pa_sc_raster_cfg[i]);
+			r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
+						     &dev->info.pa_sc_raster_cfg[i]);
+			if (r)
+				return r;
+
+			if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
+				r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0,
+						     &dev->info.pa_sc_raster_cfg1[i]);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0,
+					     &dev->info.gb_addr_cfg);
+	if (r)
+		return r;
+
+	if (dev->info.family_id < AMDGPU_FAMILY_AI) {
+		r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0,
+					     dev->info.gb_tile_mode);
 		if (r)
 			return r;

 		if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
-			r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0,
-					     &dev->info.pa_sc_raster_cfg1[i]);
+			r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0,
+						     dev->info.gb_macro_tile_mode);
 			if (r)
 				return r;
 		}
-	}

-	r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0,
-				     dev->info.gb_tile_mode);
-	if (r)
-		return r;
-
-	if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
-		r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0,
-					     dev->info.gb_macro_tile_mode);
+		r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0,
+					     &dev->info.mc_arb_ramcfg);
 		if (r)
 			return r;
 	}

-	r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0,
-				     &dev->info.gb_addr_cfg);
-	if (r)
-		return r;
-
-	r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0,
-				     &dev->info.mc_arb_ramcfg);
-	if (r)
-		return r;
-
 	dev->info.cu_active_number = dev->dev_info.cu_active_number;
 	dev->info.cu_ao_mask = dev->dev_info.cu_ao_mask;
 	memcpy(&dev->info.cu_bitmap[0][0], &dev->dev_info.cu_bitmap[0][0], sizeof(dev->info.cu_bitmap));
@ -314,3 +318,18 @@ int amdgpu_query_gds_info(amdgpu_device_handle dev,

 	return 0;
 }
+
+int amdgpu_query_sensor_info(amdgpu_device_handle dev, unsigned sensor_type,
+			     unsigned size, void *value)
+{
+	struct drm_amdgpu_info request;
+
+	memset(&request, 0, sizeof(request));
+	request.return_pointer = (uintptr_t)value;
+	request.return_size = size;
+	request.query = AMDGPU_INFO_SENSOR;
+	request.sensor_info.type = sensor_type;
+
+	return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
+			       sizeof(struct drm_amdgpu_info));
+}
--- a/lib/libdrm/amdgpu/amdgpu_internal.h
+++ b/lib/libdrm/amdgpu/amdgpu_internal.h
@ -85,9 +85,9 @@ struct amdgpu_device {
 	struct drm_amdgpu_info_device dev_info;
 	struct amdgpu_gpu_info info;
 	/** The global VA manager for the whole virtual address space */
-	struct amdgpu_bo_va_mgr *vamgr;
+	struct amdgpu_bo_va_mgr vamgr;
 	/** The VA manager for the 32bit address space */
-	struct amdgpu_bo_va_mgr *vamgr_32;
+	struct amdgpu_bo_va_mgr vamgr_32;
 };

 struct amdgpu_bo {
--- a/lib/libdrm/amdgpu/amdgpu_vamgr.c
+++ b/lib/libdrm/amdgpu/amdgpu_vamgr.c
@ -236,9 +236,9 @@ int amdgpu_va_range_alloc(amdgpu_device_handle dev,
 	struct amdgpu_bo_va_mgr *vamgr;

 	if (flags & AMDGPU_VA_RANGE_32_BIT)
-		vamgr = dev->vamgr_32;
+		vamgr = &dev->vamgr_32;
 	else
-		vamgr = dev->vamgr;
+		vamgr = &dev->vamgr;

 	va_base_alignment = MAX2(va_base_alignment, vamgr->va_alignment);
 	size = ALIGN(size, vamgr->va_alignment);
@ -249,7 +249,7 @@ int amdgpu_va_range_alloc(amdgpu_device_handle dev,
 	if (!(flags & AMDGPU_VA_RANGE_32_BIT) &&
 	    (*va_base_allocated == AMDGPU_INVALID_VA_ADDRESS)) {
 		/* fallback to 32bit address */
-		vamgr = dev->vamgr_32;
+		vamgr = &dev->vamgr_32;
 		*va_base_allocated = amdgpu_vamgr_find_va(vamgr, size,
 					va_base_alignment, va_base_required);
 	}
--- a/lib/libdrm/etnaviv/etnaviv_gpu.c
+++ b/lib/libdrm/etnaviv/etnaviv_gpu.c
@ -61,32 +61,13 @@ struct etna_gpu *etna_gpu_new(struct etna_device *dev, unsigned int core)
 	gpu->dev = dev;
 	gpu->core = core;

-	/* get specs from kernel space */
-	gpu->specs.model    	= get_param(dev, core, ETNAVIV_PARAM_GPU_MODEL);
-	gpu->specs.revision 	= get_param(dev, core, ETNAVIV_PARAM_GPU_REVISION);
-	gpu->specs.features[0] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_0);
-	gpu->specs.features[1] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_1);
-	gpu->specs.features[2] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_2);
-	gpu->specs.features[3] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_3);
-	gpu->specs.features[4] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_4);
-	gpu->specs.features[5] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_5);
-	gpu->specs.features[6] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_6);
-	gpu->specs.stream_count = get_param(dev, core, ETNA_GPU_STREAM_COUNT);
-	gpu->specs.register_max = get_param(dev, core, ETNA_GPU_REGISTER_MAX);
-	gpu->specs.thread_count = get_param(dev, core, ETNA_GPU_THREAD_COUNT);
-	gpu->specs.vertex_cache_size = get_param(dev, core, ETNA_GPU_VERTEX_CACHE_SIZE);
-	gpu->specs.shader_core_count = get_param(dev, core, ETNA_GPU_SHADER_CORE_COUNT);
-	gpu->specs.pixel_pipes = get_param(dev, core, ETNA_GPU_PIXEL_PIPES);
-	gpu->specs.vertex_output_buffer_size = get_param(dev, core, ETNA_GPU_VERTEX_OUTPUT_BUFFER_SIZE);
-	gpu->specs.buffer_size = get_param(dev, core, ETNA_GPU_BUFFER_SIZE);
-	gpu->specs.instruction_count = get_param(dev, core, ETNA_GPU_INSTRUCTION_COUNT);
-	gpu->specs.num_constants = get_param(dev, core, ETNA_GPU_NUM_CONSTANTS);
-	gpu->specs.num_varyings = get_param(dev, core, ETNA_GPU_NUM_VARYINGS);
+	gpu->model    	= get_param(dev, core, ETNAVIV_PARAM_GPU_MODEL);
+	gpu->revision 	= get_param(dev, core, ETNAVIV_PARAM_GPU_REVISION);

-	if (!gpu->specs.model)
+	if (!gpu->model)
 		goto fail;

-	INFO_MSG(" GPU model:          0x%x (rev %x)", gpu->specs.model, gpu->specs.revision);
+	INFO_MSG(" GPU model:          0x%x (rev %x)", gpu->model, gpu->revision);

 	return gpu;
 fail:
@ -104,66 +85,69 @@ void etna_gpu_del(struct etna_gpu *gpu)
 int etna_gpu_get_param(struct etna_gpu *gpu, enum etna_param_id param,
 		uint64_t *value)
 {
+	struct etna_device *dev = gpu->dev;
+	unsigned int core = gpu->core;
+
 	switch(param) {
 	case ETNA_GPU_MODEL:
-		*value = gpu->specs.model;
+		*value = gpu->model;
 		return 0;
 	case ETNA_GPU_REVISION:
-		*value = gpu->specs.revision;
+		*value = gpu->revision;
 		return 0;
 	case ETNA_GPU_FEATURES_0:
-		*value = gpu->specs.features[0];
+		*value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_0);
 		return 0;
 	case ETNA_GPU_FEATURES_1:
-		*value = gpu->specs.features[1];
+		*value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_1);
 		return 0;
 	case ETNA_GPU_FEATURES_2:
-		*value = gpu->specs.features[2];
+		*value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_2);
 		return 0;
 	case ETNA_GPU_FEATURES_3:
-		*value = gpu->specs.features[3];
+		*value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_3);
 		return 0;
 	case ETNA_GPU_FEATURES_4:
-		*value = gpu->specs.features[4];
+		*value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_4);
 		return 0;
 	case ETNA_GPU_FEATURES_5:
-		*value = gpu->specs.features[5];
+		*value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_5);
 		return 0;
 	case ETNA_GPU_FEATURES_6:
-		*value = gpu->specs.features[6];
+		*value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_6);
 		return 0;
 	case ETNA_GPU_STREAM_COUNT:
-		*value = gpu->specs.stream_count;
+		*value = get_param(dev, core, ETNA_GPU_STREAM_COUNT);
 		return 0;
 	case ETNA_GPU_REGISTER_MAX:
-		*value = gpu->specs.register_max;
+		*value = get_param(dev, core, ETNA_GPU_REGISTER_MAX);
 		return 0;
 	case ETNA_GPU_THREAD_COUNT:
-		*value = gpu->specs.thread_count;
+		*value = get_param(dev, core, ETNA_GPU_THREAD_COUNT);
 		return 0;
 	case ETNA_GPU_VERTEX_CACHE_SIZE:
-		*value = gpu->specs.vertex_cache_size;
+		*value = get_param(dev, core, ETNA_GPU_VERTEX_CACHE_SIZE);
 		return 0;
 	case ETNA_GPU_SHADER_CORE_COUNT:
-		*value = gpu->specs.shader_core_count;
+		*value = get_param(dev, core, ETNA_GPU_SHADER_CORE_COUNT);
 		return 0;
 	case ETNA_GPU_PIXEL_PIPES:
-		*value = gpu->specs.pixel_pipes;
+		*value = get_param(dev, core, ETNA_GPU_PIXEL_PIPES);
 		return 0;
 	case ETNA_GPU_VERTEX_OUTPUT_BUFFER_SIZE:
-		*value = gpu->specs.vertex_output_buffer_size;
+		*value = get_param(dev, core, ETNA_GPU_VERTEX_OUTPUT_BUFFER_SIZE);
 		return 0;
 	case ETNA_GPU_BUFFER_SIZE:
-		*value = gpu->specs.buffer_size;
+		*value = get_param(dev, core, ETNA_GPU_BUFFER_SIZE);
 		return 0;
 	case ETNA_GPU_INSTRUCTION_COUNT:
-		*value = gpu->specs.instruction_count;
+		*value = get_param(dev, core, ETNA_GPU_INSTRUCTION_COUNT);
 		return 0;
 	case ETNA_GPU_NUM_CONSTANTS:
-		*value = gpu->specs.num_constants;
+		*value = get_param(dev, core, ETNA_GPU_NUM_CONSTANTS);
 		return 0;
 	case ETNA_GPU_NUM_VARYINGS:
-		*value = gpu->specs.num_varyings;
+		*value = get_param(dev, core, ETNA_GPU_NUM_VARYINGS);
 		return 0;

 	default:
--- a/lib/libdrm/etnaviv/etnaviv_priv.h
+++ b/lib/libdrm/etnaviv/etnaviv_priv.h
@ -47,25 +47,6 @@
 #include "etnaviv_drmif.h"
 #include "etnaviv_drm.h"

-#define VIV_FEATURES_WORD_COUNT 7
-
-struct etna_specs {
-	uint32_t model;
-	uint32_t revision;
-	uint32_t features[VIV_FEATURES_WORD_COUNT];
-	uint32_t stream_count;
-	uint32_t register_max;
-	uint32_t thread_count;
-	uint32_t shader_core_count;
-	uint32_t vertex_cache_size;
-	uint32_t vertex_output_buffer_size;
-	uint32_t pixel_pipes;
-	uint32_t instruction_count;
-	uint32_t num_constants;
-	uint32_t num_varyings;
-	uint32_t buffer_size;
-};
-
 struct etna_bo_bucket {
 	uint32_t size;
 	struct list_head list;
@ -134,8 +115,9 @@ struct etna_bo {

 struct etna_gpu {
 	struct etna_device *dev;
-	struct etna_specs specs;
 	uint32_t core;
+	uint32_t model;
+	uint32_t revision;
 };

 struct etna_pipe {
--- a/lib/libdrm/exynos/exynos_drm.c
+++ b/lib/libdrm/exynos/exynos_drm.c
@ -417,7 +417,7 @@ exynos_handle_event(struct exynos_device *dev, struct exynos_event_context *ctx)

 	i = 0;
 	while (i < len) {
-		e = (struct drm_event *) &buffer[i];
+		e = (struct drm_event *)(buffer + i);
 		switch (e->type) {
 		case DRM_EVENT_VBLANK:
 			if (evctx->version < 1 ||
--- a/lib/libdrm/exynos/exynos_fimg2d.c
+++ b/lib/libdrm/exynos/exynos_fimg2d.c
@ -292,20 +292,6 @@ static void g2d_set_direction(struct g2d_context *ctx,
 	g2d_add_cmd(ctx, DST_PAT_DIRECT_REG, dir->val[1]);
 }

-/*
- * g2d_reset - reset fimg2d hardware.
- *
- * @ctx: a pointer to g2d_context structure.
- *
- */
-static void g2d_reset(struct g2d_context *ctx)
-{
-	ctx->cmd_nr = 0;
-	ctx->cmd_buf_nr = 0;
-
-	g2d_add_cmd(ctx, SOFT_RESET_REG, 0x01);
-}
-
 /*
 * g2d_flush - submit all commands and values in user side command buffer
 *		to command queue aware of fimg2d dma.
--- a/lib/libdrm/freedreno/Makefile.am
+++ b/lib/libdrm/freedreno/Makefile.am
@ -5,6 +5,7 @@ AM_CFLAGS = \
 	$(WARN_CFLAGS) \
 	-I$(top_srcdir) \
 	$(PTHREADSTUBS_CFLAGS) \
+	$(VALGRIND_CFLAGS) \
 	-I$(top_srcdir)/include/drm

 libdrm_freedreno_la_LTLIBRARIES = libdrm_freedreno.la
--- a/lib/libdrm/freedreno/freedreno_bo.c
+++ b/lib/libdrm/freedreno/freedreno_bo.c
@ -102,6 +102,8 @@ fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags)
 	bo->bo_reuse = TRUE;
 	pthread_mutex_unlock(&table_lock);

+	VG_BO_ALLOC(bo);
+
 	return bo;
 }

@ -118,6 +120,8 @@ fd_bo_from_handle(struct fd_device *dev, uint32_t handle, uint32_t size)

 	bo = bo_from_handle(dev, size, handle);

+	VG_BO_ALLOC(bo);
+
 out_unlock:
 	pthread_mutex_unlock(&table_lock);

@ -147,6 +151,8 @@ fd_bo_from_dmabuf(struct fd_device *dev, int fd)

 	bo = bo_from_handle(dev, size, handle);

+	VG_BO_ALLOC(bo);
+
 out_unlock:
 	pthread_mutex_unlock(&table_lock);

@ -177,8 +183,10 @@ struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name)
 		goto out_unlock;

 	bo = bo_from_handle(dev, req.size, req.handle);
-	if (bo)
+	if (bo) {
 		set_name(bo, name);
+		VG_BO_ALLOC(bo);
+	}

 out_unlock:
 	pthread_mutex_unlock(&table_lock);
@ -213,6 +221,8 @@ out:
 /* Called under table_lock */
 drm_private void bo_del(struct fd_bo *bo)
 {
+	VG_BO_FREE(bo);
+
 	if (bo->map)
 		drm_munmap(bo->map, bo->size);

--- a/lib/libdrm/freedreno/freedreno_bo_cache.c
+++ b/lib/libdrm/freedreno/freedreno_bo_cache.c
@ -33,7 +33,6 @@
 #include "freedreno_drmif.h"
 #include "freedreno_priv.h"

-
 drm_private void bo_del(struct fd_bo *bo);
 drm_private extern pthread_mutex_t table_lock;

@ -102,6 +101,7 @@ fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time)
 			if (time && ((time - bo->free_time) <= 1))
 				break;

+			VG_BO_OBTAIN(bo);
 			list_del(&bo->list);
 			bo_del(bo);
 		}
@ -177,6 +177,7 @@ retry:
 		*size = bucket->size;
 		bo = find_in_bucket(bucket, flags);
 		if (bo) {
+			VG_BO_OBTAIN(bo);
 			if (bo->funcs->madvise(bo, TRUE) <= 0) {
 				/* we've lost the backing pages, delete and try again: */
 				pthread_mutex_lock(&table_lock);
@ -207,6 +208,7 @@ fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo)
 		clock_gettime(CLOCK_MONOTONIC, &time);

 		bo->free_time = time.tv_sec;
+		VG_BO_RELEASE(bo);
 		list_addtail(&bo->list, &bucket->list);
 		fd_bo_cache_cleanup(cache, time.tv_sec);

--- a/lib/libdrm/freedreno/freedreno_device.c
+++ b/lib/libdrm/freedreno/freedreno_device.c
@ -115,9 +115,10 @@ static void fd_device_del_impl(struct fd_device *dev)
 	fd_bo_cache_cleanup(&dev->bo_cache, 0);
 	drmHashDestroy(dev->handle_table);
 	drmHashDestroy(dev->name_table);
+	dev->funcs->destroy(dev);
 	if (dev->closefd)
 		close(dev->fd);
-	dev->funcs->destroy(dev);
+	free(dev);
 }

 drm_private void fd_device_del_locked(struct fd_device *dev)
--- a/lib/libdrm/freedreno/freedreno_priv.h
+++ b/lib/libdrm/freedreno/freedreno_priv.h
@ -102,6 +102,9 @@ struct fd_device {
 	struct fd_bo_cache bo_cache;

 	int closefd;        /* call close(fd) upon destruction */
+
+	/* just for valgrind: */
+	int bo_size;
 };

 drm_private void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse);
@ -196,4 +199,57 @@ offset_bytes(void *end, void *start)
 	return ((char *)end) - ((char *)start);
 }

+#ifdef HAVE_VALGRIND
+#  include <memcheck.h>
+
+/*
+ * For tracking the backing memory (if valgrind enabled, we force a mmap
+ * for the purposes of tracking)
+ */
+static inline void VG_BO_ALLOC(struct fd_bo *bo)
+{
+	if (bo && RUNNING_ON_VALGRIND) {
+		VALGRIND_MALLOCLIKE_BLOCK(fd_bo_map(bo), bo->size, 0, 1);
+	}
+}
+
+static inline void VG_BO_FREE(struct fd_bo *bo)
+{
+	VALGRIND_FREELIKE_BLOCK(bo->map, 0);
+}
+
+/*
+ * For tracking bo structs that are in the buffer-cache, so that valgrind
+ * doesn't attribute ownership to the first one to allocate the recycled
+ * bo.
+ *
+ * Note that the list_head in fd_bo is used to track the buffers in cache
+ * so disable error reporting on the range while they are in cache so
+ * valgrind doesn't squawk about list traversal.
+ *
+ */
+static inline void VG_BO_RELEASE(struct fd_bo *bo)
+{
+	if (RUNNING_ON_VALGRIND) {
+		VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
+		VALGRIND_MAKE_MEM_NOACCESS(bo, bo->dev->bo_size);
+		VALGRIND_FREELIKE_BLOCK(bo->map, 0);
+	}
+}
+static inline void VG_BO_OBTAIN(struct fd_bo *bo)
+{
+	if (RUNNING_ON_VALGRIND) {
+		VALGRIND_MAKE_MEM_DEFINED(bo, bo->dev->bo_size);
+		VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
+		VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1);
+	}
+}
+#else
+static inline void VG_BO_ALLOC(struct fd_bo *bo)   {}
+static inline void VG_BO_FREE(struct fd_bo *bo)    {}
+static inline void VG_BO_RELEASE(struct fd_bo *bo) {}
+static inline void VG_BO_OBTAIN(struct fd_bo *bo)  {}
+#endif
+
+
 #endif /* FREEDRENO_PRIV_H_ */
--- a/lib/libdrm/freedreno/kgsl/kgsl_device.c
+++ b/lib/libdrm/freedreno/kgsl/kgsl_device.c
@ -61,5 +61,7 @@ drm_private struct fd_device * kgsl_device_new(int fd)
 	dev = &kgsl_dev->base;
 	dev->funcs = &funcs;

+	dev->bo_size = sizeof(struct kgsl_bo);
+
 	return dev;
 }
--- a/lib/libdrm/freedreno/msm/msm_device.c
+++ b/lib/libdrm/freedreno/msm/msm_device.c
@ -64,5 +64,7 @@ drm_private struct fd_device * msm_device_new(int fd)

 	fd_bo_cache_init(&msm_dev->ring_cache, TRUE);

+	dev->bo_size = sizeof(struct msm_bo);
+
 	return dev;
 }
--- a/lib/libdrm/freedreno/msm/msm_ringbuffer.c
+++ b/lib/libdrm/freedreno/msm/msm_ringbuffer.c
@ -496,11 +496,16 @@ static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
 	if (ring->pipe->gpu_id >= 500) {
 		struct drm_msm_gem_submit_reloc *reloc_hi;

+		/* NOTE: grab reloc_idx *before* APPEND() since that could
+		 * realloc() meaning that 'reloc' ptr is no longer valid:
+		 */
+		uint32_t reloc_idx = reloc->reloc_idx;
+
 		idx = APPEND(cmd, relocs);

 		reloc_hi = &cmd->relocs[idx];

-		reloc_hi->reloc_idx = reloc->reloc_idx;
+		reloc_hi->reloc_idx = reloc_idx;
 		reloc_hi->reloc_offset = r->offset;
 		reloc_hi->or = r->orhi;
 		reloc_hi->shift = r->shift - 32;
--- a/lib/libdrm/include/drm/README
+++ b/lib/libdrm/include/drm/README
@ -67,6 +67,8 @@ That said, it's up-to the individual developers to sync with newer version

 When and how to update these files
 ----------------------------------
+Note: One should not do _any_ changes to the files apart from the steps below.
+
 In order to update the files do the following:
 - Switch to a Linux kernel tree/branch which is not rebased.
 For example: airlied/drm-next
@ -94,10 +96,6 @@ Status: ?
 Promote to fixed size ints, which match the current (32bit) ones.


-amdgpu_drm.h
- - Using the stdint.h uint*_t over the respective __u* ones
-Status: Trivial.
-
 drm_mode.h
 - Missing DPI encode/connector pair.
 Status: Trivial.
--- a/lib/libdrm/include/drm/amdgpu_drm.h
+++ b/lib/libdrm/include/drm/amdgpu_drm.h
@ -50,6 +50,7 @@ extern "C" {
 #define DRM_AMDGPU_WAIT_CS		0x09
 #define DRM_AMDGPU_GEM_OP		0x10
 #define DRM_AMDGPU_GEM_USERPTR		0x11
+#define DRM_AMDGPU_WAIT_FENCES		0x12

 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@ -63,6 +64,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
 #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+#define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)

 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@ -79,22 +81,26 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_CPU_GTT_USWC		(1 << 2)
 /* Flag that the memory should be in VRAM and cleared */
 #define AMDGPU_GEM_CREATE_VRAM_CLEARED		(1 << 3)
+/* Flag that create shadow bo(GTT) while allocating vram bo */
+#define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
+/* Flag that allocating the BO should use linear VRAM */
+#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)

 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-	uint64_t bo_size;
+	__u64 bo_size;
 	/** physical start_addr alignment in bytes for some HW requirements */
-	uint64_t alignment;
+	__u64 alignment;
 	/** the requested memory domains */
-	uint64_t domains;
+	__u64 domains;
 	/** allocation flags */
-	uint64_t domain_flags;
+	__u64 domain_flags;
 };

 struct drm_amdgpu_gem_create_out  {
 	/** returned GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };

 union drm_amdgpu_gem_create {
@ -111,28 +117,28 @@ union drm_amdgpu_gem_create {

 struct drm_amdgpu_bo_list_in {
 	/** Type of operation */
-	uint32_t operation;
+	__u32 operation;
 	/** Handle of list or 0 if we want to create one */
-	uint32_t list_handle;
+	__u32 list_handle;
 	/** Number of BOs in list  */
-	uint32_t bo_number;
+	__u32 bo_number;
 	/** Size of each element describing BO */
-	uint32_t bo_info_size;
+	__u32 bo_info_size;
 	/** Pointer to array describing BOs */
-	uint64_t bo_info_ptr;
+	__u64 bo_info_ptr;
 };

 struct drm_amdgpu_bo_list_entry {
 	/** Handle of BO */
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	/** New (if specified) BO priority to be used during migration */
-	uint32_t bo_priority;
+	__u32 bo_priority;
 };

 struct drm_amdgpu_bo_list_out {
 	/** Handle of resource list  */
-	uint32_t list_handle;
-	uint32_t _pad;
+	__u32 list_handle;
+	__u32 _pad;
 };

 union drm_amdgpu_bo_list {
@ -156,26 +162,26 @@ union drm_amdgpu_bo_list {

 struct drm_amdgpu_ctx_in {
 	/** AMDGPU_CTX_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** For future use, no flags defined so far */
-	uint32_t	flags;
-	uint32_t	ctx_id;
-	uint32_t	_pad;
+	__u32	flags;
+	__u32	ctx_id;
+	__u32	_pad;
 };

 union drm_amdgpu_ctx_out {
 		struct {
-			uint32_t	ctx_id;
-			uint32_t	_pad;
+			__u32	ctx_id;
+			__u32	_pad;
 		} alloc;

 		struct {
 			/** For future use, no flags defined so far */
-			uint64_t	flags;
+			__u64	flags;
 			/** Number of resets caused by this context so far. */
-			uint32_t	hangs;
+			__u32	hangs;
 			/** Reset status since the last call of the ioctl. */
-			uint32_t	reset_status;
+			__u32	reset_status;
 		} state;
 };

@ -195,14 +201,15 @@ union drm_amdgpu_ctx {
 #define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)

 struct drm_amdgpu_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
+	__u64		addr;
+	__u64		size;
 	/* AMDGPU_GEM_USERPTR_* */
-	uint32_t		flags;
+	__u32		flags;
 	/* Resulting GEM handle */
-	uint32_t		handle;
+	__u32		handle;
 };

+/* SI-CI-VI: */
 /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
 #define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
 #define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
@ -221,10 +228,15 @@ struct drm_amdgpu_gem_userptr {
 #define AMDGPU_TILING_NUM_BANKS_SHIFT			21
 #define AMDGPU_TILING_NUM_BANKS_MASK			0x3

+/* GFX9 and later: */
+#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
+#define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
+
+/* Set/Get helpers for tiling flags. */
 #define AMDGPU_TILING_SET(field, value) \
-	(((value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
+	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
 #define AMDGPU_TILING_GET(value, field) \
-	(((value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
+	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)

 #define AMDGPU_GEM_METADATA_OP_SET_METADATA                  1
 #define AMDGPU_GEM_METADATA_OP_GET_METADATA                  2
@ -232,28 +244,28 @@ struct drm_amdgpu_gem_userptr {
 /** The same structure is shared for input/output */
 struct drm_amdgpu_gem_metadata {
 	/** GEM Object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** Do we want get or set metadata */
-	uint32_t	op;
+	__u32	op;
 	struct {
 		/** For future use, no flags defined so far */
-		uint64_t	flags;
+		__u64	flags;
 		/** family specific tiling info */
-		uint64_t	tiling_info;
-		uint32_t	data_size_bytes;
-		uint32_t	data[64];
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
 	} data;
 };

 struct drm_amdgpu_gem_mmap_in {
 	/** the GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };

 struct drm_amdgpu_gem_mmap_out {
 	/** mmap offset from the vma offset manager */
-	uint64_t addr_ptr;
+	__u64 addr_ptr;
 };

 union drm_amdgpu_gem_mmap {
@ -263,18 +275,18 @@ union drm_amdgpu_gem_mmap {

 struct drm_amdgpu_gem_wait_idle_in {
 	/** GEM object handle */
-	uint32_t handle;
+	__u32 handle;
 	/** For future use, no flags defined so far */
-	uint32_t flags;
+	__u32 flags;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
+	__u64 timeout;
 };

 struct drm_amdgpu_gem_wait_idle_out {
 	/** BO status:  0 - BO is idle, 1 - BO is busy */
-	uint32_t status;
+	__u32 status;
 	/** Returned current memory domain */
-	uint32_t domain;
+	__u32 domain;
 };

 union drm_amdgpu_gem_wait_idle {
@ -284,18 +296,18 @@ union drm_amdgpu_gem_wait_idle {

 struct drm_amdgpu_wait_cs_in {
 	/** Command submission handle */
-	uint64_t handle;
+	__u64 handle;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
 };

 struct drm_amdgpu_wait_cs_out {
 	/** CS status:  0 - CS completed, 1 - CS still busy */
-	uint64_t status;
+	__u64 status;
 };

 union drm_amdgpu_wait_cs {
@ -303,21 +315,49 @@ union drm_amdgpu_wait_cs {
 	struct drm_amdgpu_wait_cs_out out;
 };

+struct drm_amdgpu_fence {
+	__u32 ctx_id;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u64 seq_no;
+};
+
+struct drm_amdgpu_wait_fences_in {
+	/** This points to uint64_t * which points to fences */
+	__u64 fences;
+	__u32 fence_count;
+	__u32 wait_all;
+	__u64 timeout_ns;
+};
+
+struct drm_amdgpu_wait_fences_out {
+	__u32 status;
+	__u32 first_signaled;
+};
+
+union drm_amdgpu_wait_fences {
+	struct drm_amdgpu_wait_fences_in in;
+	struct drm_amdgpu_wait_fences_out out;
+};
+
 #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
 #define AMDGPU_GEM_OP_SET_PLACEMENT		1

 /* Sets or returns a value associated with a buffer. */
 struct drm_amdgpu_gem_op {
 	/** GEM object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** AMDGPU_GEM_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** Input or return value */
-	uint64_t	value;
+	__u64	value;
 };

 #define AMDGPU_VA_OP_MAP			1
 #define AMDGPU_VA_OP_UNMAP			2
+#define AMDGPU_VA_OP_CLEAR			3
+#define AMDGPU_VA_OP_REPLACE			4

 /* Delay the page table update till the next CS */
 #define AMDGPU_VM_DELAY_UPDATE		(1 << 0)
@ -329,21 +369,35 @@ struct drm_amdgpu_gem_op {
 #define AMDGPU_VM_PAGE_WRITEABLE	(1 << 2)
 /* executable mapping, new for VI */
 #define AMDGPU_VM_PAGE_EXECUTABLE	(1 << 3)
+/* partially resident texture */
+#define AMDGPU_VM_PAGE_PRT		(1 << 4)
+/* MTYPE flags use bit 5 to 8 */
+#define AMDGPU_VM_MTYPE_MASK		(0xf << 5)
+/* Default MTYPE. Pre-AI must use this.  Recommended for newer ASICs. */
+#define AMDGPU_VM_MTYPE_DEFAULT		(0 << 5)
+/* Use NC MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_NC		(1 << 5)
+/* Use WC MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_WC		(2 << 5)
+/* Use CC MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_CC		(3 << 5)
+/* Use UC MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_UC		(4 << 5)

 struct drm_amdgpu_gem_va {
 	/** GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 	/** AMDGPU_VA_OP_* */
-	uint32_t operation;
+	__u32 operation;
 	/** AMDGPU_VM_PAGE_* */
-	uint32_t flags;
+	__u32 flags;
 	/** va address to assign . Must be correctly aligned.*/
-	uint64_t va_address;
+	__u64 va_address;
 	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
-	uint64_t offset_in_bo;
+	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
-	uint64_t map_size;
+	__u64 map_size;
 };

 #define AMDGPU_HW_IP_GFX          0
@ -351,7 +405,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_DMA          2
 #define AMDGPU_HW_IP_UVD          3
 #define AMDGPU_HW_IP_VCE          4
-#define AMDGPU_HW_IP_NUM          5
+#define AMDGPU_HW_IP_UVD_ENC      5
+#define AMDGPU_HW_IP_NUM          6

 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1

@ -360,24 +415,24 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03

 struct drm_amdgpu_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };

 struct drm_amdgpu_cs_in {
 	/** Rendering context id */
-	uint32_t		ctx_id;
+	__u32		ctx_id;
 	/**  Handle of resource list associated with CS */
-	uint32_t		bo_list_handle;
-	uint32_t		num_chunks;
-	uint32_t		_pad;
-	/** this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		_pad;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 };

 struct drm_amdgpu_cs_out {
-	uint64_t handle;
+	__u64 handle;
 };

 union drm_amdgpu_cs {
@ -390,36 +445,39 @@ union drm_amdgpu_cs {
 /* This IB should be submitted to CE */
 #define AMDGPU_IB_FLAG_CE	(1<<0)

-/* CE Preamble */
+/* Preamble flag, which means the IB could be dropped if no context switch */
 #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)

+/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
+#define AMDGPU_IB_FLAG_PREEMPT (1<<2)
+
 struct drm_amdgpu_cs_chunk_ib {
-	uint32_t _pad;
+	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
-	uint32_t flags;
+	__u32 flags;
 	/** Virtual address to begin IB execution */
-	uint64_t va_start;
+	__u64 va_start;
 	/** Size of submission */
-	uint32_t ib_bytes;
+	__u32 ib_bytes;
 	/** HW IP to submit to */
-	uint32_t ip_type;
+	__u32 ip_type;
 	/** HW IP index of the same type to submit to  */
-	uint32_t ip_instance;
+	__u32 ip_instance;
 	/** Ring index to submit to */
-	uint32_t ring;
+	__u32 ring;
 };

 struct drm_amdgpu_cs_chunk_dep {
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
-	uint64_t handle;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+	__u64 handle;
 };

 struct drm_amdgpu_cs_chunk_fence {
-	uint32_t handle;
-	uint32_t offset;
+	__u32 handle;
+	__u32 offset;
 };

 struct drm_amdgpu_cs_chunk_data {
@ -434,6 +492,7 @@ struct drm_amdgpu_cs_chunk_data {
 *
 */
 #define AMDGPU_IDS_FLAGS_FUSION         0x1
+#define AMDGPU_IDS_FLAGS_PREEMPTION     0x2

 /* indicate if acceleration can be working */
 #define AMDGPU_INFO_ACCEL_WORKING		0x00
@ -467,6 +526,10 @@ struct drm_amdgpu_cs_chunk_data {
 	#define AMDGPU_INFO_FW_SMC		0x0a
 	/* Subquery id: Query SDMA firmware version */
 	#define AMDGPU_INFO_FW_SDMA		0x0b
+	/* Subquery id: Query PSP SOS firmware version */
+	#define AMDGPU_INFO_FW_SOS		0x0c
+	/* Subquery id: Query PSP ASD firmware version */
+	#define AMDGPU_INFO_FW_ASD		0x0d
 /* number of bytes moved for TTM migration */
 #define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
 /* the used VRAM size */
@ -483,6 +546,36 @@ struct drm_amdgpu_cs_chunk_data {
 #define AMDGPU_INFO_DEV_INFO			0x16
 /* visible vram usage */
 #define AMDGPU_INFO_VIS_VRAM_USAGE		0x17
+/* number of TTM buffer evictions */
+#define AMDGPU_INFO_NUM_EVICTIONS		0x18
+/* Query memory about VRAM and GTT domains */
+#define AMDGPU_INFO_MEMORY			0x19
+/* Query vce clock table */
+#define AMDGPU_INFO_VCE_CLOCK_TABLE		0x1A
+/* Query vbios related information */
+#define AMDGPU_INFO_VBIOS			0x1B
+	/* Subquery id: Query vbios size */
+	#define AMDGPU_INFO_VBIOS_SIZE		0x1
+	/* Subquery id: Query vbios image */
+	#define AMDGPU_INFO_VBIOS_IMAGE		0x2
+/* Query UVD handles */
+#define AMDGPU_INFO_NUM_HANDLES			0x1C
+/* Query sensor related information */
+#define AMDGPU_INFO_SENSOR			0x1D
+	/* Subquery id: Query GPU shader clock */
+	#define AMDGPU_INFO_SENSOR_GFX_SCLK		0x1
+	/* Subquery id: Query GPU memory clock */
+	#define AMDGPU_INFO_SENSOR_GFX_MCLK		0x2
+	/* Subquery id: Query GPU temperature */
+	#define AMDGPU_INFO_SENSOR_GPU_TEMP		0x3
+	/* Subquery id: Query GPU load */
+	#define AMDGPU_INFO_SENSOR_GPU_LOAD		0x4
+	/* Subquery id: Query average GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_AVG_POWER	0x5
+	/* Subquery id: Query northbridge voltage */
+	#define AMDGPU_INFO_SENSOR_VDDNB		0x6
+	/* Subquery id: Query graphics voltage */
+	#define AMDGPU_INFO_SENSOR_VDDGFX		0x7

 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
@ -491,86 +584,123 @@ struct drm_amdgpu_cs_chunk_data {

 struct drm_amdgpu_query_fw {
 	/** AMDGPU_INFO_FW_* */
-	uint32_t fw_type;
+	__u32 fw_type;
 	/**
 	 * Index of the IP if there are more IPs of
 	 * the same type.
 	 */
-	uint32_t ip_instance;
+	__u32 ip_instance;
 	/**
 	 * Index of the engine. Whether this is used depends
 	 * on the firmware type. (e.g. MEC, SDMA)
 	 */
-	uint32_t index;
-	uint32_t _pad;
+	__u32 index;
+	__u32 _pad;
 };

 /* Input structure for the INFO ioctl */
 struct drm_amdgpu_info {
 	/* Where the return value will be stored */
-	uint64_t return_pointer;
+	__u64 return_pointer;
 	/* The size of the return value. Just like "size" in "snprintf",
 	 * it limits how many bytes the kernel can write. */
-	uint32_t return_size;
+	__u32 return_size;
 	/* The query request id. */
-	uint32_t query;
+	__u32 query;

 	union {
 		struct {
-			uint32_t id;
-			uint32_t _pad;
+			__u32 id;
+			__u32 _pad;
 		} mode_crtc;

 		struct {
 			/** AMDGPU_HW_IP_* */
-			uint32_t type;
+			__u32 type;
 			/**
 			 * Index of the IP if there are more IPs of the same
 			 * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
 			 */
-			uint32_t ip_instance;
+			__u32 ip_instance;
 		} query_hw_ip;

 		struct {
-			uint32_t dword_offset;
+			__u32 dword_offset;
 			/** number of registers to read */
-			uint32_t count;
-			uint32_t instance;
+			__u32 count;
+			__u32 instance;
 			/** For future use, no flags defined so far */
-			uint32_t flags;
+			__u32 flags;
 		} read_mmr_reg;

 		struct drm_amdgpu_query_fw query_fw;
+
+		struct {
+			__u32 type;
+			__u32 offset;
+		} vbios_info;
+
+		struct {
+			__u32 type;
+		} sensor_info;
 	};
 };

 struct drm_amdgpu_info_gds {
 	/** GDS GFX partition size */
-	uint32_t gds_gfx_partition_size;
+	__u32 gds_gfx_partition_size;
 	/** GDS compute partition size */
-	uint32_t compute_partition_size;
+	__u32 compute_partition_size;
 	/** total GDS memory size */
-	uint32_t gds_total_size;
+	__u32 gds_total_size;
 	/** GWS size per GFX partition */
-	uint32_t gws_per_gfx_partition;
+	__u32 gws_per_gfx_partition;
 	/** GSW size per compute partition */
-	uint32_t gws_per_compute_partition;
+	__u32 gws_per_compute_partition;
 	/** OA size per GFX partition */
-	uint32_t oa_per_gfx_partition;
+	__u32 oa_per_gfx_partition;
 	/** OA size per compute partition */
-	uint32_t oa_per_compute_partition;
-	uint32_t _pad;
+	__u32 oa_per_compute_partition;
+	__u32 _pad;
 };

 struct drm_amdgpu_info_vram_gtt {
-	uint64_t vram_size;
-	uint64_t vram_cpu_accessible_size;
-	uint64_t gtt_size;
+	__u64 vram_size;
+	__u64 vram_cpu_accessible_size;
+	__u64 gtt_size;
+};
+
+struct drm_amdgpu_heap_info {
+	/** max. physical memory */
+	__u64 total_heap_size;
+
+	/** Theoretical max. available memory in the given heap */
+	__u64 usable_heap_size;
+
+	/**
+	 * Number of bytes allocated in the heap. This includes all processes
+	 * and private allocations in the kernel. It changes when new buffers
+	 * are allocated, freed, and moved. It cannot be larger than
+	 * heap_size.
+	 */
+	__u64 heap_usage;
+
+	/**
+	 * Theoretical possible max. size of buffer which
+	 * could be allocated in the given heap
+	 */
+	__u64 max_allocation;
+};
+
+struct drm_amdgpu_memory_info {
+	struct drm_amdgpu_heap_info vram;
+	struct drm_amdgpu_heap_info cpu_accessible_vram;
+	struct drm_amdgpu_heap_info gtt;
 };

 struct drm_amdgpu_info_firmware {
-	uint32_t ver;
-	uint32_t feature;
+	__u32 ver;
+	__u32 feature;
 };

 #define AMDGPU_VRAM_TYPE_UNKNOWN 0
@ -584,71 +714,108 @@ struct drm_amdgpu_info_firmware {

 struct drm_amdgpu_info_device {
 	/** PCI Device ID */
-	uint32_t device_id;
+	__u32 device_id;
 	/** Internal chip revision: A0, A1, etc.) */
-	uint32_t chip_rev;
-	uint32_t external_rev;
+	__u32 chip_rev;
+	__u32 external_rev;
 	/** Revision id in PCI Config space */
-	uint32_t pci_rev;
-	uint32_t family;
-	uint32_t num_shader_engines;
-	uint32_t num_shader_arrays_per_engine;
+	__u32 pci_rev;
+	__u32 family;
+	__u32 num_shader_engines;
+	__u32 num_shader_arrays_per_engine;
 	/* in KHz */
-	uint32_t gpu_counter_freq;
-	uint64_t max_engine_clock;
-	uint64_t max_memory_clock;
+	__u32 gpu_counter_freq;
+	__u64 max_engine_clock;
+	__u64 max_memory_clock;
 	/* cu information */
-	uint32_t cu_active_number;
-	uint32_t cu_ao_mask;
-	uint32_t cu_bitmap[4][4];
+	__u32 cu_active_number;
+	__u32 cu_ao_mask;
+	__u32 cu_bitmap[4][4];
 	/** Render backend pipe mask. One render backend is CB+DB. */
-	uint32_t enabled_rb_pipes_mask;
-	uint32_t num_rb_pipes;
-	uint32_t num_hw_gfx_contexts;
-	uint32_t _pad;
-	uint64_t ids_flags;
+	__u32 enabled_rb_pipes_mask;
+	__u32 num_rb_pipes;
+	__u32 num_hw_gfx_contexts;
+	__u32 _pad;
+	__u64 ids_flags;
 	/** Starting virtual address for UMDs. */
-	uint64_t virtual_address_offset;
+	__u64 virtual_address_offset;
 	/** The maximum virtual address */
-	uint64_t virtual_address_max;
+	__u64 virtual_address_max;
 	/** Required alignment of virtual addresses. */
-	uint32_t virtual_address_alignment;
+	__u32 virtual_address_alignment;
 	/** Page table entry - fragment size */
-	uint32_t pte_fragment_size;
-	uint32_t gart_page_size;
+	__u32 pte_fragment_size;
+	__u32 gart_page_size;
 	/** constant engine ram size*/
-	uint32_t ce_ram_size;
+	__u32 ce_ram_size;
 	/** video memory type info*/
-	uint32_t vram_type;
+	__u32 vram_type;
 	/** video memory bit width*/
-	uint32_t vram_bit_width;
+	__u32 vram_bit_width;
 	/* vce harvesting instance */
-	uint32_t vce_harvest_config;
+	__u32 vce_harvest_config;
+	/* gfx double offchip LDS buffers */
+	__u32 gc_double_offchip_lds_buf;
+	/* NGG Primitive Buffer */
+	__u64 prim_buf_gpu_addr;
+	/* NGG Position Buffer */
+	__u64 pos_buf_gpu_addr;
+	/* NGG Control Sideband */
+	__u64 cntl_sb_buf_gpu_addr;
+	/* NGG Parameter Cache */
+	__u64 param_buf_gpu_addr;
 };

 struct drm_amdgpu_info_hw_ip {
 	/** Version of h/w IP */
-	uint32_t  hw_ip_version_major;
-	uint32_t  hw_ip_version_minor;
+	__u32  hw_ip_version_major;
+	__u32  hw_ip_version_minor;
 	/** Capabilities */
-	uint64_t  capabilities_flags;
+	__u64  capabilities_flags;
 	/** command buffer address start alignment*/
-	uint32_t  ib_start_alignment;
+	__u32  ib_start_alignment;
 	/** command buffer size alignment*/
-	uint32_t  ib_size_alignment;
+	__u32  ib_size_alignment;
 	/** Bitmask of available rings. Bit 0 means ring 0, etc. */
-	uint32_t  available_rings;
-	uint32_t  _pad;
+	__u32  available_rings;
+	__u32  _pad;
+};
+
+struct drm_amdgpu_info_num_handles {
+	/** Max handles as supported by firmware for UVD */
+	__u32  uvd_max_handles;
+	/** Handles currently in use for UVD */
+	__u32  uvd_used_handles;
+};
+
+#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES		6
+
+struct drm_amdgpu_info_vce_clock_table_entry {
+	/** System clock */
+	__u32 sclk;
+	/** Memory clock */
+	__u32 mclk;
+	/** VCE clock */
+	__u32 eclk;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_vce_clock_table {
+	struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES];
+	__u32 num_valid_entries;
+	__u32 pad;
 };

 /*
 * Supported GPU families
 */
 #define AMDGPU_FAMILY_UNKNOWN			0
+#define AMDGPU_FAMILY_SI			110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */
 #define AMDGPU_FAMILY_CI			120 /* Bonaire, Hawaii */
 #define AMDGPU_FAMILY_KV			125 /* Kaveri, Kabini, Mullins */
 #define AMDGPU_FAMILY_VI			130 /* Iceland, Tonga */
 #define AMDGPU_FAMILY_CZ			135 /* Carrizo, Stoney */
+#define AMDGPU_FAMILY_AI			141 /* Vega10 */

 #if defined(__cplusplus)
 }
--- a/lib/libdrm/include/drm/drm_mode.h
+++ b/lib/libdrm/include/drm/drm_mode.h
@ -47,7 +47,15 @@ extern "C" {
 #define DRM_MODE_TYPE_DRIVER	(1<<6)

 /* Video mode flags */
-/* bit compatible with the xorg definitions. */
+/* bit compatible with the xrandr RR_ definitions (bits 0-13)
+ *
+ * ABI warning: Existing userspace really expects
+ * the mode flags to match the xrandr definitions. Any
+ * changes that don't match the xrandr definitions will
+ * likely need a new client cap or some other mechanism
+ * to avoid breaking existing userspace. This includes
+ * allocating new flags in the previously unused bits!
+ */
 #define DRM_MODE_FLAG_PHSYNC			(1<<0)
 #define DRM_MODE_FLAG_NHSYNC			(1<<1)
 #define DRM_MODE_FLAG_PVSYNC			(1<<2)
@ -107,6 +115,10 @@ extern "C" {
 #define DRM_MODE_DIRTY_ON       1
 #define DRM_MODE_DIRTY_ANNOTATE 2

+/* Link Status options */
+#define DRM_MODE_LINK_STATUS_GOOD	0
+#define DRM_MODE_LINK_STATUS_BAD	1
+
 struct drm_mode_modeinfo {
 	__u32 clock;
 	__u16 hdisplay;
@ -220,14 +232,16 @@ struct drm_mode_get_encoder {

 /* This is for connectors with multiple signal types. */
 /* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */
-#define DRM_MODE_SUBCONNECTOR_Automatic	0
-#define DRM_MODE_SUBCONNECTOR_Unknown	0
-#define DRM_MODE_SUBCONNECTOR_DVID	3
-#define DRM_MODE_SUBCONNECTOR_DVIA	4
-#define DRM_MODE_SUBCONNECTOR_Composite	5
-#define DRM_MODE_SUBCONNECTOR_SVIDEO	6
-#define DRM_MODE_SUBCONNECTOR_Component	8
-#define DRM_MODE_SUBCONNECTOR_SCART	9
+enum drm_mode_subconnector {
+	DRM_MODE_SUBCONNECTOR_Automatic = 0,
+	DRM_MODE_SUBCONNECTOR_Unknown = 0,
+	DRM_MODE_SUBCONNECTOR_DVID = 3,
+	DRM_MODE_SUBCONNECTOR_DVIA = 4,
+	DRM_MODE_SUBCONNECTOR_Composite = 5,
+	DRM_MODE_SUBCONNECTOR_SVIDEO = 6,
+	DRM_MODE_SUBCONNECTOR_Component = 8,
+	DRM_MODE_SUBCONNECTOR_SCART = 9,
+};

 #define DRM_MODE_CONNECTOR_Unknown	0
 #define DRM_MODE_CONNECTOR_VGA		1
@ -392,17 +406,20 @@ struct drm_mode_fb_cmd2 {
 	 * offsets[1].  Note that offsets[0] will generally
 	 * be 0 (but this is not required).
 	 *
-	 * To accommodate tiled, compressed, etc formats, a per-plane
+	 * To accommodate tiled, compressed, etc formats, a
 	 * modifier can be specified.  The default value of zero
 	 * indicates "native" format as specified by the fourcc.
-	 * Vendor specific modifier token.  This allows, for example,
-	 * different tiling/swizzling pattern on different planes.
-	 * See discussion above of DRM_FORMAT_MOD_xxx.
+	 * Vendor specific modifier token.  Note that even though
+	 * it looks like we have a modifier per-plane, we in fact
+	 * do not. The modifier for each plane must be identical.
+	 * Thus all combinations of different data layouts for
+	 * multi plane formats must be enumerated as separate
+	 * modifiers.
 	 */
 	__u32 handles[4];
 	__u32 pitches[4]; /* pitch for each plane */
 	__u32 offsets[4]; /* offset of each plane */
-	__u64 modifier[4]; /* ie, tiling, compressed (per plane) */
+	__u64 modifier[4]; /* ie, tiling, compress */
 };

 #define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01
--- a/lib/libdrm/man/drm-kms.xml
+++ b/lib/libdrm/man/drm-kms.xml
@ -309,8 +309,8 @@ static int modeset_find_crtc(int fd, drmModeRes *res, drmModeConnector *conn)
  <refsect1>
    <title>Reporting Bugs</title>
    <para>Bugs in this manual should be reported to
-          http://bugs.freedesktop.org under the "Mesa" product, with "Other" or
-          "libdrm" as the component.</para>
+      https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&amp;component=libdrm
+      under the "DRI" product, component "libdrm"</para>
  </refsect1>

  <refsect1>
--- a/lib/libdrm/man/drm-memory.xml
+++ b/lib/libdrm/man/drm-memory.xml
@ -410,8 +410,8 @@ memset(map, 0, creq.size);
  <refsect1>
    <title>Reporting Bugs</title>
    <para>Bugs in this manual should be reported to
-          http://bugs.freedesktop.org under the "Mesa" product, with "Other" or
-          "libdrm" as the component.</para>
+      https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&amp;component=libdrm
+      under the "DRI" product, component "libdrm"</para>
  </refsect1>

  <refsect1>
--- a/lib/libdrm/man/drm.xml
+++ b/lib/libdrm/man/drm.xml
@ -50,7 +50,7 @@

    <para>In earlier days, the kernel framework was solely used to provide raw
          hardware access to priviledged user-space processes which implement
-          all the hardware abstraction layers. But more and more tasks where
+          all the hardware abstraction layers. But more and more tasks were
          moved into the kernel. All these interfaces are based on
          <citerefentry><refentrytitle>ioctl</refentrytitle><manvolnum>2</manvolnum></citerefentry>
          commands on the DRM character device. The <emphasis>libdrm</emphasis>
@ -119,8 +119,8 @@
  <refsect1>
    <title>Reporting Bugs</title>
    <para>Bugs in this manual should be reported to
-          http://bugs.freedesktop.org under the "Mesa" product, with "Other" or
-          "libdrm" as the component.</para>
+      https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&amp;component=libdrm
+      under the "DRI" product, component "libdrm"</para>
  </refsect1>

  <refsect1>
--- a/lib/libdrm/man/drmAvailable.xml
+++ b/lib/libdrm/man/drmAvailable.xml
@ -61,8 +61,8 @@
  <refsect1>
    <title>Reporting Bugs</title>
    <para>Bugs in this function should be reported to
-          http://bugs.freedesktop.org under the "Mesa" product, with "Other" or
-          "libdrm" as the component.</para>
+      https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&amp;component=libdrm
+      under the "DRI" product, component "libdrm"</para>
  </refsect1>

  <refsect1>
--- a/lib/libdrm/man/drmHandleEvent.xml
+++ b/lib/libdrm/man/drmHandleEvent.xml
@ -86,8 +86,8 @@ typedef struct _drmEventContext {
  <refsect1>
    <title>Reporting Bugs</title>
    <para>Bugs in this function should be reported to
-          http://bugs.freedesktop.org under the "Mesa" product, with "Other" or
-          "libdrm" as the component.</para>
+      https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&amp;component=libdrm
+      under the "DRI" product, component "libdrm"</para>
  </refsect1>

  <refsect1>
--- a/lib/libdrm/man/drmModeGetResources.xml
+++ b/lib/libdrm/man/drmModeGetResources.xml
@ -116,8 +116,8 @@ typedef struct _drmModeRes {
  <refsect1>
    <title>Reporting Bugs</title>
    <para>Bugs in this function should be reported to
-          http://bugs.freedesktop.org under the "Mesa" product, with "Other" or
-          "libdrm" as the component.</para>
+      https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&amp;component=libdrm
+      under the "DRI" product, component "libdrm"</para>
  </refsect1>

  <refsect1>
--- a/lib/libdrm/tegra/tegra-symbol-check
+++ b/lib/libdrm/tegra/tegra-symbol-check
@ -1,11 +1,14 @@
 #!/bin/bash

-# The following symbols (past the first five) are taken from the public headers.
-# A list of the latter should be available Makefile.sources/LIBDRM_FREEDRENO_H_FILES
+# The following symbols (past the first nine) are taken from tegra.h.

 FUNCS=$(nm -D --format=bsd --defined-only ${1-.libs/libdrm_tegra.so} | awk '{print $3}'| while read func; do
 ( grep -q "^$func$" || echo $func )  <<EOF
+__bss_end__
+__bss_start__
 __bss_start
+__end__
+_bss_end__
 _edata
 _end
 _fini
--- a/lib/libdrm/tests/amdgpu/basic_tests.c
+++ b/lib/libdrm/tests/amdgpu/basic_tests.c
@ -803,12 +803,16 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
+	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo_mc;
 	volatile uint32_t *bo_cpu;
 	int i, j, r, loop;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle va_handle;

+	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+	CU_ASSERT_EQUAL(r, 0);
+
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);

@ -848,7 +852,10 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
 					       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 			pm4[i++] = 0xffffffff & bo_mc;
 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
-			pm4[i++] = sdma_write_length;
+			if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+				pm4[i++] = sdma_write_length - 1;
+			else
+				pm4[i++] = sdma_write_length;
 			while(j++ < sdma_write_length)
 				pm4[i++] = 0xdeadbeaf;
 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
@ -904,12 +911,16 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
+	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo_mc;
 	volatile uint32_t *bo_cpu;
 	int i, j, r, loop;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle va_handle;

+	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+	CU_ASSERT_EQUAL(r, 0);
+
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);

@ -949,7 +960,10 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
 			pm4[i++] = 0xffffffff & bo_mc;
 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
 			pm4[i++] = 0xdeadbeaf;
-			pm4[i++] = sdma_write_length;
+			if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+				pm4[i++] = sdma_write_length - 1;
+			else
+				pm4[i++] = sdma_write_length;
 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
 			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
 			pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
@ -1007,12 +1021,16 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
+	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo1_mc, bo2_mc;
 	volatile unsigned char *bo1_cpu, *bo2_cpu;
 	int i, j, r, loop1, loop2;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;

+	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+	CU_ASSERT_EQUAL(r, 0);
+
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);

@ -1064,7 +1082,10 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 			i = j = 0;
 			if (ip_type == AMDGPU_HW_IP_DMA) {
 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
-				pm4[i++] = sdma_write_length;
+				if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+					pm4[i++] = sdma_write_length - 1;
+				else
+					pm4[i++] = sdma_write_length;
 				pm4[i++] = 0;
 				pm4[i++] = 0xffffffff & bo1_mc;
 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
--- a/lib/libdrm/tests/amdgpu/cs_tests.c
+++ b/lib/libdrm/tests/amdgpu/cs_tests.c
@ -175,11 +175,11 @@ static int submit(unsigned ndw, unsigned ip)

 static void uvd_cmd(uint64_t addr, unsigned cmd, int *idx)
 {
-	ib_cpu[(*idx)++] = 0x3BC4;
+	ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC4 : 0x81C4;
 	ib_cpu[(*idx)++] = addr;
-	ib_cpu[(*idx)++] = 0x3BC5;
+	ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC5 : 0x81C5;
 	ib_cpu[(*idx)++] = addr >> 32;
-	ib_cpu[(*idx)++] = 0x3BC3;
+	ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC3 : 0x81C3;
 	ib_cpu[(*idx)++] = cmd << 1;
 }

@ -211,10 +211,13 @@ static void amdgpu_cs_uvd_create(void)
 	CU_ASSERT_EQUAL(r, 0);

 	memcpy(msg, uvd_create_msg, sizeof(uvd_create_msg));
+
 	if (family_id >= AMDGPU_FAMILY_VI) {
 		((uint8_t*)msg)[0x10] = 7;
-		/* chip polaris 10/11 */
-		if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) {
+		/* chip beyond polaris 10/11 */
+		if ((family_id == AMDGPU_FAMILY_AI) ||
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+		     chip_id == chip_rev+0x64)) {
 			/* dpb size */
 			((uint8_t*)msg)[0x28] = 0x00;
 			((uint8_t*)msg)[0x29] = 0x94;
@ -287,13 +290,16 @@ static void amdgpu_cs_uvd_decode(void)
 	CU_ASSERT_EQUAL(r, 0);

 	memcpy(ptr, uvd_decode_msg, sizeof(uvd_create_msg));
+
 	if (family_id >= AMDGPU_FAMILY_VI) {
 		ptr[0x10] = 7;
 		ptr[0x98] = 0x00;
 		ptr[0x99] = 0x02;
-		/* chip polaris10/11 */
-		if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) {
-			/*dpb size */
+		/* chip beyond polaris10/11 */
+		if ((family_id == AMDGPU_FAMILY_AI) ||
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+		     chip_id == chip_rev+0x64)) {
+			/* dpb size */
 			ptr[0x24] = 0x00;
 			ptr[0x25] = 0x94;
 			ptr[0x26] = 0x6B;
@ -335,9 +341,12 @@ static void amdgpu_cs_uvd_decode(void)
 		bs_addr = fb_addr + 4*1024;
 	dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024);

-	if ((family_id >= AMDGPU_FAMILY_VI) &&
-		(chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
-		ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
+	if (family_id >= AMDGPU_FAMILY_VI) {
+		if ((family_id == AMDGPU_FAMILY_AI) ||
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+		     chip_id == chip_rev+0x64)) {
+			ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
+		}
 	}

 	dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024);
@ -348,12 +357,16 @@ static void amdgpu_cs_uvd_decode(void)
 	uvd_cmd(dt_addr, 0x2, &i);
 	uvd_cmd(fb_addr, 0x3, &i);
 	uvd_cmd(bs_addr, 0x100, &i);
+
 	if (family_id >= AMDGPU_FAMILY_VI) {
 		uvd_cmd(it_addr, 0x204, &i);
-		if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)
+		if ((family_id == AMDGPU_FAMILY_AI) ||
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+		     chip_id == chip_rev+0x64))
 			uvd_cmd(ctx_addr, 0x206, &i);
-}
-	ib_cpu[i++] = 0x3BC6;
+	}
+
+	ib_cpu[i++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC6 : 0x81C6;
 	ib_cpu[i++] = 0x1;
 	for (; i % 16; ++i)
 		ib_cpu[i] = 0x80000000;
--- a/lib/libdrm/tests/amdgpu/vce_tests.c
+++ b/lib/libdrm/tests/amdgpu/vce_tests.c
@ -234,6 +234,7 @@ static void free_resource(struct amdgpu_vce_bo *vce_bo)

 static void amdgpu_cs_vce_create(void)
 {
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
 	int len, r;

 	enc.width = vce_create[6];
@ -250,6 +251,8 @@ static void amdgpu_cs_vce_create(void)
 	memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
 	len += sizeof(vce_taskinfo) / 4;
 	memcpy((ib_cpu + len), vce_create, sizeof(vce_create));
+	ib_cpu[len + 8] = ALIGN(enc.width, align);
+	ib_cpu[len + 9] = ALIGN(enc.width, align);
 	len += sizeof(vce_create) / 4;
 	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
 	ib_cpu[len + 2] = enc.fb[0].addr >> 32;
@ -291,10 +294,12 @@ static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 {

 	uint64_t luma_offset, chroma_offset;
-	int len = 0, r;
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+	unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
+	int len = 0, i, r;

 	luma_offset = enc->vbuf.addr;
-	chroma_offset = luma_offset + enc->width * enc->height;
+	chroma_offset = luma_offset + luma_size;

 	memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
 	len += sizeof(vce_session) / 4;
@ -309,6 +314,10 @@ static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 	ib_cpu[len + 3] = enc->cpb.addr;
 	len += sizeof(vce_context_buffer) / 4;
 	memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
+	for (i = 0; i <  8; ++i)
+		ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
+	for (i = 0; i <  8; ++i)
+		ib_cpu[len + 10 + i] = luma_size * 1.5;
 	len += sizeof(vce_aux_buffer) / 4;
 	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
 	ib_cpu[len + 2] = enc->fb[0].addr >> 32;
@ -319,8 +328,10 @@ static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 	ib_cpu[len + 10] = luma_offset;
 	ib_cpu[len + 11] = chroma_offset >> 32;
 	ib_cpu[len + 12] = chroma_offset;
-	ib_cpu[len + 73] = 0x7800;
-	ib_cpu[len + 74] = 0x7800 + 0x5000;
+	ib_cpu[len + 14] = ALIGN(enc->width, align);
+	ib_cpu[len + 15] = ALIGN(enc->width, align);
+	ib_cpu[len + 73] = luma_size * 1.5;
+	ib_cpu[len + 74] = luma_size * 2.5;
 	len += sizeof(vce_encode) / 4;
 	enc->ib_len = len;
 	if (!enc->two_instance) {
@ -332,11 +343,13 @@ static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
 {
 	uint64_t luma_offset, chroma_offset;
-	int len, r;
+	int len, i, r;
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+	unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);

 	len = (enc->two_instance) ? enc->ib_len : 0;
 	luma_offset = enc->vbuf.addr;
-	chroma_offset = luma_offset + enc->width * enc->height;
+	chroma_offset = luma_offset + luma_size;

 	if (!enc->two_instance) {
 		memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
@ -353,6 +366,10 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
 	ib_cpu[len + 3] = enc->cpb.addr;
 	len += sizeof(vce_context_buffer) / 4;
 	memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
+	for (i = 0; i <  8; ++i)
+		ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
+	for (i = 0; i <  8; ++i)
+		ib_cpu[len + 10 + i] = luma_size * 1.5;
 	len += sizeof(vce_aux_buffer) / 4;
 	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
 	ib_cpu[len + 2] = enc->fb[1].addr >> 32;
@ -364,15 +381,17 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
 	ib_cpu[len + 10] = luma_offset;
 	ib_cpu[len + 11] = chroma_offset >> 32;
 	ib_cpu[len + 12] = chroma_offset;
+	ib_cpu[len + 14] = ALIGN(enc->width, align);
+	ib_cpu[len + 15] = ALIGN(enc->width, align);
 	ib_cpu[len + 18] = 0;
 	ib_cpu[len + 19] = 0;
 	ib_cpu[len + 56] = 3;
 	ib_cpu[len + 57] = 0;
 	ib_cpu[len + 58] = 0;
-	ib_cpu[len + 59] = 0x7800;
-	ib_cpu[len + 60] = 0x7800 + 0x5000;
+	ib_cpu[len + 59] = luma_size * 1.5;
+	ib_cpu[len + 60] = luma_size * 2.5;
 	ib_cpu[len + 73] = 0;
-	ib_cpu[len + 74] = 0x5000;
+	ib_cpu[len + 74] = luma_size;
 	ib_cpu[len + 81] = 1;
 	ib_cpu[len + 82] = 1;
 	len += sizeof(vce_encode) / 4;
@ -408,9 +427,10 @@ static void check_result(struct amdgpu_vce_encode *enc)
 static void amdgpu_cs_vce_encode(void)
 {
 	uint32_t vbuf_size, bs_size = 0x154000, cpb_size;
-	int r;
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+	int i, r;

-	vbuf_size = enc.width * enc.height * 1.5;
+	vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5;
 	cpb_size = vbuf_size * 10;
 	num_resources = 0;
 	alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
@ -429,7 +449,17 @@ static void amdgpu_cs_vce_encode(void)

 	r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr);
 	CU_ASSERT_EQUAL(r, 0);
-	memcpy(enc.vbuf.ptr, frame, sizeof(frame));
+
+	memset(enc.vbuf.ptr, 0, vbuf_size);
+	for (i = 0; i < enc.height; ++i) {
+		memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width);
+		enc.vbuf.ptr += ALIGN(enc.width, align);
+	}
+	for (i = 0; i < enc.height / 2; ++i) {
+		memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width);
+		enc.vbuf.ptr += ALIGN(enc.width, align);
+	}
+
 	r = amdgpu_bo_cpu_unmap(enc.vbuf.handle);
 	CU_ASSERT_EQUAL(r, 0);

--- a/lib/libdrm/tests/etnaviv/Makefile.am
+++ b/lib/libdrm/tests/etnaviv/Makefile.am
@ -28,6 +28,7 @@ etnaviv_2d_test_SOURCES = \
 	write_bmp.h

 etnaviv_cmd_stream_test_LDADD = \
+	$(top_builddir)/libdrm.la \
 	$(top_builddir)/etnaviv/libdrm_etnaviv.la

 etnaviv_cmd_stream_test_SOURCES = \
--- a/lib/libdrm/tests/exynos/exynos_fimg2d_test.c
+++ b/lib/libdrm/tests/exynos/exynos_fimg2d_test.c
@ -59,7 +59,6 @@ static void connector_find_mode(int fd, struct connector *c,
 		if (!connector) {
 			fprintf(stderr, "could not get connector %i: %s\n",
 				resources->connectors[i], strerror(errno));
-			drmModeFreeConnector(connector);
 			continue;
 		}

@ -98,7 +97,6 @@ static void connector_find_mode(int fd, struct connector *c,
 		if (!c->encoder) {
 			fprintf(stderr, "could not get encoder %i: %s\n",
 				resources->encoders[i], strerror(errno));
-			drmModeFreeEncoder(c->encoder);
 			continue;
 		}

@ -264,7 +262,8 @@ static int g2d_copy_test(struct exynos_device *dev, struct exynos_bo *src,
 		userptr = (unsigned long)malloc(size);
 		if (!userptr) {
 			fprintf(stderr, "failed to allocate userptr.\n");
-			return -EFAULT;
+			ret = -EFAULT;
+			goto fail;
 		}

 		src_img.user_ptr[0].userptr = userptr;
@ -469,7 +468,8 @@ static int g2d_copy_with_scale_test(struct exynos_device *dev,
 		userptr = (unsigned long)malloc(size);
 		if (!userptr) {
 			fprintf(stderr, "failed to allocate userptr.\n");
-			return -EFAULT;
+			ret = -EFAULT;
+			goto fail;
 		}

 		src_img.user_ptr[0].userptr = userptr;
@ -520,9 +520,10 @@ err_free_userptr:
 fail:
 	g2d_fini(ctx);

-	return 0;
+	return ret;;
 }

+#if EXYNOS_G2D_USERPTR_TEST
 static int g2d_blend_test(struct exynos_device *dev,
 					struct exynos_bo *src,
 					struct exynos_bo *dst,
@ -557,7 +558,8 @@ static int g2d_blend_test(struct exynos_device *dev,
 		userptr = (unsigned long)malloc(size);
 		if (!userptr) {
 			fprintf(stderr, "failed to allocate userptr.\n");
-			return -EFAULT;
+			ret = -EFAULT;
+			goto fail;
 		}

 		src_img.user_ptr[0].userptr = userptr;
@ -619,8 +621,9 @@ err_free_userptr:
 fail:
 	g2d_fini(ctx);

-	return 0;
+	return ret;
 }
+#endif

 static int g2d_checkerboard_test(struct exynos_device *dev,
 					struct exynos_bo *src,
@ -645,8 +648,8 @@ static int g2d_checkerboard_test(struct exynos_device *dev,
 	dst_y = 0;

 	checkerboard = create_checkerboard_pattern(screen_width / 32, screen_height / 32, 32);
-	if (checkerboard == NULL) {
-		ret = -1;
+	if (!checkerboard) {
+		ret = -EFAULT;
 		goto fail;
 	}

@ -755,8 +758,8 @@ int main(int argc, char **argv)

 	dev = exynos_device_create(fd);
 	if (!dev) {
-		drmClose(dev->fd);
-		return -EFAULT;
+		ret = -EFAULT;
+		goto err_drm_close;
 	}

 	resources = drmModeGetResources(dev->fd);
@ -764,7 +767,7 @@ int main(int argc, char **argv)
 		fprintf(stderr, "drmModeGetResources failed: %s\n",
 				strerror(errno));
 		ret = -EFAULT;
-		goto err_drm_close;
+		goto err_dev_destory;
 	}

 	connector_find_mode(dev->fd, &con, resources);
@ -773,7 +776,7 @@ int main(int argc, char **argv)
 	if (!con.mode) {
 		fprintf(stderr, "failed to find usable connector\n");
 		ret = -EFAULT;
-		goto err_drm_close;
+		goto err_dev_destory;
 	}

 	screen_width = con.mode->hdisplay;
@ -782,7 +785,7 @@ int main(int argc, char **argv)
 	if (screen_width == 0 || screen_height == 0) {
 		fprintf(stderr, "failed to find sane resolution on connector\n");
 		ret = -EFAULT;
-		goto err_drm_close;
+		goto err_dev_destory;
 	}

 	printf("screen width = %d, screen height = %d\n", screen_width,
@ -791,7 +794,7 @@ int main(int argc, char **argv)
 	bo = exynos_create_buffer(dev, screen_width * screen_height * 4, 0);
 	if (!bo) {
 		ret = -EFAULT;
-		goto err_drm_close;
+		goto err_dev_destory;
 	}

 	handles[0] = bo->handle;
@ -864,7 +867,7 @@ int main(int argc, char **argv)
 	 *
 	 * Disable the test for now, until the kernel code has been sanitized.
 	 */
-#if 0
+#if EXYNOS_G2D_USERPTR_TEST
 	ret  = g2d_blend_test(dev, src, bo, G2D_IMGBUF_USERPTR);
 	if (ret < 0)
 		fprintf(stderr, "failed to test blend operation.\n");
@ -882,9 +885,11 @@ err_rm_fb:
 err_destroy_buffer:
 	exynos_destroy_buffer(bo);

-err_drm_close:
-	drmClose(dev->fd);
+err_dev_destory:
 	exynos_device_destroy(dev);

-	return 0;
+err_drm_close:
+	drmClose(fd);
+
+	return ret;
 }