diff --git a/lib/libdrm/amdgpu/amdgpu-symbol-check b/lib/libdrm/amdgpu/amdgpu-symbol-check index 87f4fd2cc..4d1ae65cb 100755 --- a/lib/libdrm/amdgpu/amdgpu-symbol-check +++ b/lib/libdrm/amdgpu/amdgpu-symbol-check @@ -22,6 +22,7 @@ amdgpu_bo_list_update amdgpu_bo_query_info amdgpu_bo_set_metadata amdgpu_bo_va_op +amdgpu_bo_va_op_raw amdgpu_bo_wait_for_idle amdgpu_create_bo_from_user_mem amdgpu_cs_create_semaphore @@ -45,6 +46,7 @@ amdgpu_query_heap_info amdgpu_query_hw_ip_count amdgpu_query_hw_ip_info amdgpu_query_info +amdgpu_query_sensor_info amdgpu_read_mm_registers amdgpu_va_range_alloc amdgpu_va_range_free diff --git a/lib/libdrm/amdgpu/amdgpu.h b/lib/libdrm/amdgpu/amdgpu.h index 7b26a04c9..55884b247 100644 --- a/lib/libdrm/amdgpu/amdgpu.h +++ b/lib/libdrm/amdgpu/amdgpu.h @@ -1058,6 +1058,24 @@ int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id, int amdgpu_query_gds_info(amdgpu_device_handle dev, struct amdgpu_gds_resource_info *gds_info); +/** + * Query information about sensor. + * + * The return size is query-specific and depends on the "sensor_type" + * parameter. No more than "size" bytes is returned. + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param sensor_type - \c [in] AMDGPU_INFO_SENSOR_* + * \param size - \c [in] Size of the returned value. + * \param value - \c [out] Pointer to the return value. + * + * \return 0 on success\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_query_sensor_info(amdgpu_device_handle dev, unsigned sensor_type, + unsigned size, void *value); + /** * Read a set of consecutive memory-mapped registers. * Not all registers are allowed to be read by userspace. @@ -1185,6 +1203,34 @@ int amdgpu_bo_va_op(amdgpu_bo_handle bo, uint64_t flags, uint32_t ops); +/** + * VA mapping/unmapping for a buffer object or PRT region. + * + * This is not a simple drop-in extension for amdgpu_bo_va_op; instead, all + * parameters are treated "raw", i.e. size is not automatically aligned, and + * all flags must be specified explicitly. + * + * \param dev - \c [in] device handle + * \param bo - \c [in] BO handle (may be NULL) + * \param offset - \c [in] Start offset to map + * \param size - \c [in] Size to map + * \param addr - \c [in] Start virtual address. + * \param flags - \c [in] Supported flags for mapping/unmapping + * \param ops - \c [in] AMDGPU_VA_OP_MAP or AMDGPU_VA_OP_UNMAP + * + * \return 0 on success\n + * <0 - Negative POSIX Error code + * +*/ + +int amdgpu_bo_va_op_raw(amdgpu_device_handle dev, + amdgpu_bo_handle bo, + uint64_t offset, + uint64_t size, + uint64_t addr, + uint64_t flags, + uint32_t ops); + /** * create semaphore * diff --git a/lib/libdrm/amdgpu/amdgpu_device.c b/lib/libdrm/amdgpu/amdgpu_device.c index f4ede0316..f473d2daf 100644 --- a/lib/libdrm/amdgpu/amdgpu_device.c +++ b/lib/libdrm/amdgpu/amdgpu_device.c @@ -131,10 +131,8 @@ static int amdgpu_get_auth(int fd, int *auth) static void amdgpu_device_free_internal(amdgpu_device_handle dev) { - amdgpu_vamgr_deinit(dev->vamgr); - free(dev->vamgr); - amdgpu_vamgr_deinit(dev->vamgr_32); - free(dev->vamgr_32); + amdgpu_vamgr_deinit(&dev->vamgr_32); + amdgpu_vamgr_deinit(&dev->vamgr); util_hash_table_destroy(dev->bo_flink_names); util_hash_table_destroy(dev->bo_handles); pthread_mutex_destroy(&dev->bo_table_mutex); @@ -255,25 +253,18 @@ int amdgpu_device_initialize(int fd, if (r) goto cleanup; - dev->vamgr = calloc(1, sizeof(struct amdgpu_bo_va_mgr)); - if (dev->vamgr == NULL) - goto cleanup; - - amdgpu_vamgr_init(dev->vamgr, dev->dev_info.virtual_address_offset, + amdgpu_vamgr_init(&dev->vamgr, dev->dev_info.virtual_address_offset, dev->dev_info.virtual_address_max, dev->dev_info.virtual_address_alignment); max = MIN2(dev->dev_info.virtual_address_max, 0xffffffff); - start = amdgpu_vamgr_find_va(dev->vamgr, + start = amdgpu_vamgr_find_va(&dev->vamgr, max - dev->dev_info.virtual_address_offset, dev->dev_info.virtual_address_alignment, 0); if (start > 0xffffffff) goto free_va; /* shouldn't get here */ - dev->vamgr_32 = calloc(1, sizeof(struct amdgpu_bo_va_mgr)); - if (dev->vamgr_32 == NULL) - goto free_va; - amdgpu_vamgr_init(dev->vamgr_32, start, max, + amdgpu_vamgr_init(&dev->vamgr_32, start, max, dev->dev_info.virtual_address_alignment); *major_version = dev->major_version; @@ -286,10 +277,9 @@ int amdgpu_device_initialize(int fd, free_va: r = -ENOMEM; - amdgpu_vamgr_free_va(dev->vamgr, start, + amdgpu_vamgr_free_va(&dev->vamgr, start, max - dev->dev_info.virtual_address_offset); - amdgpu_vamgr_deinit(dev->vamgr); - free(dev->vamgr); + amdgpu_vamgr_deinit(&dev->vamgr); cleanup: if (dev->fd >= 0) diff --git a/lib/libdrm/amdgpu/amdgpu_gpu_info.c b/lib/libdrm/amdgpu/amdgpu_gpu_info.c index 66c7e0e1b..f4b94c9ed 100644 --- a/lib/libdrm/amdgpu/amdgpu_gpu_info.c +++ b/lib/libdrm/amdgpu/amdgpu_gpu_info.c @@ -169,54 +169,58 @@ drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev) dev->info.vce_harvest_config = dev->dev_info.vce_harvest_config; dev->info.pci_rev_id = dev->dev_info.pci_rev; - for (i = 0; i < (int)dev->info.num_shader_engines; i++) { - unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) | - (AMDGPU_INFO_MMR_SH_INDEX_MASK << - AMDGPU_INFO_MMR_SH_INDEX_SHIFT); + if (dev->info.family_id < AMDGPU_FAMILY_AI) { + for (i = 0; i < (int)dev->info.num_shader_engines; i++) { + unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) | + (AMDGPU_INFO_MMR_SH_INDEX_MASK << + AMDGPU_INFO_MMR_SH_INDEX_SHIFT); - r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0, - &dev->info.backend_disable[i]); - if (r) - return r; - /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */ - dev->info.backend_disable[i] = - (dev->info.backend_disable[i] >> 16) & 0xff; + r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0, + &dev->info.backend_disable[i]); + if (r) + return r; + /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */ + dev->info.backend_disable[i] = + (dev->info.backend_disable[i] >> 16) & 0xff; - r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0, - &dev->info.pa_sc_raster_cfg[i]); + r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0, + &dev->info.pa_sc_raster_cfg[i]); + if (r) + return r; + + if (dev->info.family_id >= AMDGPU_FAMILY_CI) { + r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0, + &dev->info.pa_sc_raster_cfg1[i]); + if (r) + return r; + } + } + } + + r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0, + &dev->info.gb_addr_cfg); + if (r) + return r; + + if (dev->info.family_id < AMDGPU_FAMILY_AI) { + r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0, + dev->info.gb_tile_mode); if (r) return r; if (dev->info.family_id >= AMDGPU_FAMILY_CI) { - r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0, - &dev->info.pa_sc_raster_cfg1[i]); + r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0, + dev->info.gb_macro_tile_mode); if (r) return r; } - } - r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0, - dev->info.gb_tile_mode); - if (r) - return r; - - if (dev->info.family_id >= AMDGPU_FAMILY_CI) { - r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0, - dev->info.gb_macro_tile_mode); + r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0, + &dev->info.mc_arb_ramcfg); if (r) return r; } - r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0, - &dev->info.gb_addr_cfg); - if (r) - return r; - - r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0, - &dev->info.mc_arb_ramcfg); - if (r) - return r; - dev->info.cu_active_number = dev->dev_info.cu_active_number; dev->info.cu_ao_mask = dev->dev_info.cu_ao_mask; memcpy(&dev->info.cu_bitmap[0][0], &dev->dev_info.cu_bitmap[0][0], sizeof(dev->info.cu_bitmap)); @@ -314,3 +318,18 @@ int amdgpu_query_gds_info(amdgpu_device_handle dev, return 0; } + +int amdgpu_query_sensor_info(amdgpu_device_handle dev, unsigned sensor_type, + unsigned size, void *value) +{ + struct drm_amdgpu_info request; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)value; + request.return_size = size; + request.query = AMDGPU_INFO_SENSOR; + request.sensor_info.type = sensor_type; + + return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, + sizeof(struct drm_amdgpu_info)); +} diff --git a/lib/libdrm/amdgpu/amdgpu_internal.h b/lib/libdrm/amdgpu/amdgpu_internal.h index 4f039b68f..cf119a530 100644 --- a/lib/libdrm/amdgpu/amdgpu_internal.h +++ b/lib/libdrm/amdgpu/amdgpu_internal.h @@ -85,9 +85,9 @@ struct amdgpu_device { struct drm_amdgpu_info_device dev_info; struct amdgpu_gpu_info info; /** The global VA manager for the whole virtual address space */ - struct amdgpu_bo_va_mgr *vamgr; + struct amdgpu_bo_va_mgr vamgr; /** The VA manager for the 32bit address space */ - struct amdgpu_bo_va_mgr *vamgr_32; + struct amdgpu_bo_va_mgr vamgr_32; }; struct amdgpu_bo { diff --git a/lib/libdrm/amdgpu/amdgpu_vamgr.c b/lib/libdrm/amdgpu/amdgpu_vamgr.c index 8a707cbcd..2b1388edc 100644 --- a/lib/libdrm/amdgpu/amdgpu_vamgr.c +++ b/lib/libdrm/amdgpu/amdgpu_vamgr.c @@ -236,9 +236,9 @@ int amdgpu_va_range_alloc(amdgpu_device_handle dev, struct amdgpu_bo_va_mgr *vamgr; if (flags & AMDGPU_VA_RANGE_32_BIT) - vamgr = dev->vamgr_32; + vamgr = &dev->vamgr_32; else - vamgr = dev->vamgr; + vamgr = &dev->vamgr; va_base_alignment = MAX2(va_base_alignment, vamgr->va_alignment); size = ALIGN(size, vamgr->va_alignment); @@ -249,7 +249,7 @@ int amdgpu_va_range_alloc(amdgpu_device_handle dev, if (!(flags & AMDGPU_VA_RANGE_32_BIT) && (*va_base_allocated == AMDGPU_INVALID_VA_ADDRESS)) { /* fallback to 32bit address */ - vamgr = dev->vamgr_32; + vamgr = &dev->vamgr_32; *va_base_allocated = amdgpu_vamgr_find_va(vamgr, size, va_base_alignment, va_base_required); } diff --git a/lib/libdrm/etnaviv/etnaviv_gpu.c b/lib/libdrm/etnaviv/etnaviv_gpu.c index 35dec6cda..bc355e8fc 100644 --- a/lib/libdrm/etnaviv/etnaviv_gpu.c +++ b/lib/libdrm/etnaviv/etnaviv_gpu.c @@ -61,32 +61,13 @@ struct etna_gpu *etna_gpu_new(struct etna_device *dev, unsigned int core) gpu->dev = dev; gpu->core = core; - /* get specs from kernel space */ - gpu->specs.model = get_param(dev, core, ETNAVIV_PARAM_GPU_MODEL); - gpu->specs.revision = get_param(dev, core, ETNAVIV_PARAM_GPU_REVISION); - gpu->specs.features[0] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_0); - gpu->specs.features[1] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_1); - gpu->specs.features[2] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_2); - gpu->specs.features[3] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_3); - gpu->specs.features[4] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_4); - gpu->specs.features[5] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_5); - gpu->specs.features[6] = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_6); - gpu->specs.stream_count = get_param(dev, core, ETNA_GPU_STREAM_COUNT); - gpu->specs.register_max = get_param(dev, core, ETNA_GPU_REGISTER_MAX); - gpu->specs.thread_count = get_param(dev, core, ETNA_GPU_THREAD_COUNT); - gpu->specs.vertex_cache_size = get_param(dev, core, ETNA_GPU_VERTEX_CACHE_SIZE); - gpu->specs.shader_core_count = get_param(dev, core, ETNA_GPU_SHADER_CORE_COUNT); - gpu->specs.pixel_pipes = get_param(dev, core, ETNA_GPU_PIXEL_PIPES); - gpu->specs.vertex_output_buffer_size = get_param(dev, core, ETNA_GPU_VERTEX_OUTPUT_BUFFER_SIZE); - gpu->specs.buffer_size = get_param(dev, core, ETNA_GPU_BUFFER_SIZE); - gpu->specs.instruction_count = get_param(dev, core, ETNA_GPU_INSTRUCTION_COUNT); - gpu->specs.num_constants = get_param(dev, core, ETNA_GPU_NUM_CONSTANTS); - gpu->specs.num_varyings = get_param(dev, core, ETNA_GPU_NUM_VARYINGS); + gpu->model = get_param(dev, core, ETNAVIV_PARAM_GPU_MODEL); + gpu->revision = get_param(dev, core, ETNAVIV_PARAM_GPU_REVISION); - if (!gpu->specs.model) + if (!gpu->model) goto fail; - INFO_MSG(" GPU model: 0x%x (rev %x)", gpu->specs.model, gpu->specs.revision); + INFO_MSG(" GPU model: 0x%x (rev %x)", gpu->model, gpu->revision); return gpu; fail: @@ -104,66 +85,69 @@ void etna_gpu_del(struct etna_gpu *gpu) int etna_gpu_get_param(struct etna_gpu *gpu, enum etna_param_id param, uint64_t *value) { + struct etna_device *dev = gpu->dev; + unsigned int core = gpu->core; + switch(param) { case ETNA_GPU_MODEL: - *value = gpu->specs.model; + *value = gpu->model; return 0; case ETNA_GPU_REVISION: - *value = gpu->specs.revision; + *value = gpu->revision; return 0; case ETNA_GPU_FEATURES_0: - *value = gpu->specs.features[0]; + *value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_0); return 0; case ETNA_GPU_FEATURES_1: - *value = gpu->specs.features[1]; + *value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_1); return 0; case ETNA_GPU_FEATURES_2: - *value = gpu->specs.features[2]; + *value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_2); return 0; case ETNA_GPU_FEATURES_3: - *value = gpu->specs.features[3]; + *value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_3); return 0; case ETNA_GPU_FEATURES_4: - *value = gpu->specs.features[4]; + *value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_4); return 0; case ETNA_GPU_FEATURES_5: - *value = gpu->specs.features[5]; + *value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_5); return 0; case ETNA_GPU_FEATURES_6: - *value = gpu->specs.features[6]; + *value = get_param(dev, core, ETNAVIV_PARAM_GPU_FEATURES_6); return 0; case ETNA_GPU_STREAM_COUNT: - *value = gpu->specs.stream_count; + *value = get_param(dev, core, ETNA_GPU_STREAM_COUNT); return 0; case ETNA_GPU_REGISTER_MAX: - *value = gpu->specs.register_max; + *value = get_param(dev, core, ETNA_GPU_REGISTER_MAX); return 0; case ETNA_GPU_THREAD_COUNT: - *value = gpu->specs.thread_count; + *value = get_param(dev, core, ETNA_GPU_THREAD_COUNT); return 0; case ETNA_GPU_VERTEX_CACHE_SIZE: - *value = gpu->specs.vertex_cache_size; + *value = get_param(dev, core, ETNA_GPU_VERTEX_CACHE_SIZE); return 0; case ETNA_GPU_SHADER_CORE_COUNT: - *value = gpu->specs.shader_core_count; + *value = get_param(dev, core, ETNA_GPU_SHADER_CORE_COUNT); return 0; case ETNA_GPU_PIXEL_PIPES: - *value = gpu->specs.pixel_pipes; + *value = get_param(dev, core, ETNA_GPU_PIXEL_PIPES); return 0; case ETNA_GPU_VERTEX_OUTPUT_BUFFER_SIZE: - *value = gpu->specs.vertex_output_buffer_size; + *value = get_param(dev, core, ETNA_GPU_VERTEX_OUTPUT_BUFFER_SIZE); return 0; case ETNA_GPU_BUFFER_SIZE: - *value = gpu->specs.buffer_size; + *value = get_param(dev, core, ETNA_GPU_BUFFER_SIZE); return 0; case ETNA_GPU_INSTRUCTION_COUNT: - *value = gpu->specs.instruction_count; + *value = get_param(dev, core, ETNA_GPU_INSTRUCTION_COUNT); return 0; case ETNA_GPU_NUM_CONSTANTS: - *value = gpu->specs.num_constants; + *value = get_param(dev, core, ETNA_GPU_NUM_CONSTANTS); return 0; case ETNA_GPU_NUM_VARYINGS: - *value = gpu->specs.num_varyings; + *value = get_param(dev, core, ETNA_GPU_NUM_VARYINGS); return 0; default: diff --git a/lib/libdrm/etnaviv/etnaviv_priv.h b/lib/libdrm/etnaviv/etnaviv_priv.h index feaa5ad99..1334ba3f0 100644 --- a/lib/libdrm/etnaviv/etnaviv_priv.h +++ b/lib/libdrm/etnaviv/etnaviv_priv.h @@ -47,25 +47,6 @@ #include "etnaviv_drmif.h" #include "etnaviv_drm.h" -#define VIV_FEATURES_WORD_COUNT 7 - -struct etna_specs { - uint32_t model; - uint32_t revision; - uint32_t features[VIV_FEATURES_WORD_COUNT]; - uint32_t stream_count; - uint32_t register_max; - uint32_t thread_count; - uint32_t shader_core_count; - uint32_t vertex_cache_size; - uint32_t vertex_output_buffer_size; - uint32_t pixel_pipes; - uint32_t instruction_count; - uint32_t num_constants; - uint32_t num_varyings; - uint32_t buffer_size; -}; - struct etna_bo_bucket { uint32_t size; struct list_head list; @@ -134,8 +115,9 @@ struct etna_bo { struct etna_gpu { struct etna_device *dev; - struct etna_specs specs; uint32_t core; + uint32_t model; + uint32_t revision; }; struct etna_pipe { diff --git a/lib/libdrm/exynos/exynos_drm.c b/lib/libdrm/exynos/exynos_drm.c index b961e5207..f6204f1c2 100644 --- a/lib/libdrm/exynos/exynos_drm.c +++ b/lib/libdrm/exynos/exynos_drm.c @@ -417,7 +417,7 @@ exynos_handle_event(struct exynos_device *dev, struct exynos_event_context *ctx) i = 0; while (i < len) { - e = (struct drm_event *) &buffer[i]; + e = (struct drm_event *)(buffer + i); switch (e->type) { case DRM_EVENT_VBLANK: if (evctx->version < 1 || diff --git a/lib/libdrm/exynos/exynos_fimg2d.c b/lib/libdrm/exynos/exynos_fimg2d.c index 7f1d105a5..61340c36c 100644 --- a/lib/libdrm/exynos/exynos_fimg2d.c +++ b/lib/libdrm/exynos/exynos_fimg2d.c @@ -292,20 +292,6 @@ static void g2d_set_direction(struct g2d_context *ctx, g2d_add_cmd(ctx, DST_PAT_DIRECT_REG, dir->val[1]); } -/* - * g2d_reset - reset fimg2d hardware. - * - * @ctx: a pointer to g2d_context structure. - * - */ -static void g2d_reset(struct g2d_context *ctx) -{ - ctx->cmd_nr = 0; - ctx->cmd_buf_nr = 0; - - g2d_add_cmd(ctx, SOFT_RESET_REG, 0x01); -} - /* * g2d_flush - submit all commands and values in user side command buffer * to command queue aware of fimg2d dma. diff --git a/lib/libdrm/freedreno/Makefile.am b/lib/libdrm/freedreno/Makefile.am index 0771d146b..cbb0d0318 100644 --- a/lib/libdrm/freedreno/Makefile.am +++ b/lib/libdrm/freedreno/Makefile.am @@ -5,6 +5,7 @@ AM_CFLAGS = \ $(WARN_CFLAGS) \ -I$(top_srcdir) \ $(PTHREADSTUBS_CFLAGS) \ + $(VALGRIND_CFLAGS) \ -I$(top_srcdir)/include/drm libdrm_freedreno_la_LTLIBRARIES = libdrm_freedreno.la diff --git a/lib/libdrm/freedreno/freedreno_bo.c b/lib/libdrm/freedreno/freedreno_bo.c index 996d6b95c..10949ebf0 100644 --- a/lib/libdrm/freedreno/freedreno_bo.c +++ b/lib/libdrm/freedreno/freedreno_bo.c @@ -102,6 +102,8 @@ fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags) bo->bo_reuse = TRUE; pthread_mutex_unlock(&table_lock); + VG_BO_ALLOC(bo); + return bo; } @@ -118,6 +120,8 @@ fd_bo_from_handle(struct fd_device *dev, uint32_t handle, uint32_t size) bo = bo_from_handle(dev, size, handle); + VG_BO_ALLOC(bo); + out_unlock: pthread_mutex_unlock(&table_lock); @@ -147,6 +151,8 @@ fd_bo_from_dmabuf(struct fd_device *dev, int fd) bo = bo_from_handle(dev, size, handle); + VG_BO_ALLOC(bo); + out_unlock: pthread_mutex_unlock(&table_lock); @@ -177,8 +183,10 @@ struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name) goto out_unlock; bo = bo_from_handle(dev, req.size, req.handle); - if (bo) + if (bo) { set_name(bo, name); + VG_BO_ALLOC(bo); + } out_unlock: pthread_mutex_unlock(&table_lock); @@ -213,6 +221,8 @@ out: /* Called under table_lock */ drm_private void bo_del(struct fd_bo *bo) { + VG_BO_FREE(bo); + if (bo->map) drm_munmap(bo->map, bo->size); diff --git a/lib/libdrm/freedreno/freedreno_bo_cache.c b/lib/libdrm/freedreno/freedreno_bo_cache.c index 7becb0d64..d922f3a90 100644 --- a/lib/libdrm/freedreno/freedreno_bo_cache.c +++ b/lib/libdrm/freedreno/freedreno_bo_cache.c @@ -33,7 +33,6 @@ #include "freedreno_drmif.h" #include "freedreno_priv.h" - drm_private void bo_del(struct fd_bo *bo); drm_private extern pthread_mutex_t table_lock; @@ -102,6 +101,7 @@ fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time) if (time && ((time - bo->free_time) <= 1)) break; + VG_BO_OBTAIN(bo); list_del(&bo->list); bo_del(bo); } @@ -177,6 +177,7 @@ retry: *size = bucket->size; bo = find_in_bucket(bucket, flags); if (bo) { + VG_BO_OBTAIN(bo); if (bo->funcs->madvise(bo, TRUE) <= 0) { /* we've lost the backing pages, delete and try again: */ pthread_mutex_lock(&table_lock); @@ -207,6 +208,7 @@ fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo) clock_gettime(CLOCK_MONOTONIC, &time); bo->free_time = time.tv_sec; + VG_BO_RELEASE(bo); list_addtail(&bo->list, &bucket->list); fd_bo_cache_cleanup(cache, time.tv_sec); diff --git a/lib/libdrm/freedreno/freedreno_device.c b/lib/libdrm/freedreno/freedreno_device.c index fcbf1402a..dba7ec47f 100644 --- a/lib/libdrm/freedreno/freedreno_device.c +++ b/lib/libdrm/freedreno/freedreno_device.c @@ -115,9 +115,10 @@ static void fd_device_del_impl(struct fd_device *dev) fd_bo_cache_cleanup(&dev->bo_cache, 0); drmHashDestroy(dev->handle_table); drmHashDestroy(dev->name_table); + dev->funcs->destroy(dev); if (dev->closefd) close(dev->fd); - dev->funcs->destroy(dev); + free(dev); } drm_private void fd_device_del_locked(struct fd_device *dev) diff --git a/lib/libdrm/freedreno/freedreno_priv.h b/lib/libdrm/freedreno/freedreno_priv.h index 32170391a..8dd3ee694 100644 --- a/lib/libdrm/freedreno/freedreno_priv.h +++ b/lib/libdrm/freedreno/freedreno_priv.h @@ -102,6 +102,9 @@ struct fd_device { struct fd_bo_cache bo_cache; int closefd; /* call close(fd) upon destruction */ + + /* just for valgrind: */ + int bo_size; }; drm_private void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse); @@ -196,4 +199,57 @@ offset_bytes(void *end, void *start) return ((char *)end) - ((char *)start); } +#ifdef HAVE_VALGRIND +# include + +/* + * For tracking the backing memory (if valgrind enabled, we force a mmap + * for the purposes of tracking) + */ +static inline void VG_BO_ALLOC(struct fd_bo *bo) +{ + if (bo && RUNNING_ON_VALGRIND) { + VALGRIND_MALLOCLIKE_BLOCK(fd_bo_map(bo), bo->size, 0, 1); + } +} + +static inline void VG_BO_FREE(struct fd_bo *bo) +{ + VALGRIND_FREELIKE_BLOCK(bo->map, 0); +} + +/* + * For tracking bo structs that are in the buffer-cache, so that valgrind + * doesn't attribute ownership to the first one to allocate the recycled + * bo. + * + * Note that the list_head in fd_bo is used to track the buffers in cache + * so disable error reporting on the range while they are in cache so + * valgrind doesn't squawk about list traversal. + * + */ +static inline void VG_BO_RELEASE(struct fd_bo *bo) +{ + if (RUNNING_ON_VALGRIND) { + VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size); + VALGRIND_MAKE_MEM_NOACCESS(bo, bo->dev->bo_size); + VALGRIND_FREELIKE_BLOCK(bo->map, 0); + } +} +static inline void VG_BO_OBTAIN(struct fd_bo *bo) +{ + if (RUNNING_ON_VALGRIND) { + VALGRIND_MAKE_MEM_DEFINED(bo, bo->dev->bo_size); + VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size); + VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1); + } +} +#else +static inline void VG_BO_ALLOC(struct fd_bo *bo) {} +static inline void VG_BO_FREE(struct fd_bo *bo) {} +static inline void VG_BO_RELEASE(struct fd_bo *bo) {} +static inline void VG_BO_OBTAIN(struct fd_bo *bo) {} +#endif + + #endif /* FREEDRENO_PRIV_H_ */ diff --git a/lib/libdrm/freedreno/kgsl/kgsl_device.c b/lib/libdrm/freedreno/kgsl/kgsl_device.c index 175e83781..958e8a728 100644 --- a/lib/libdrm/freedreno/kgsl/kgsl_device.c +++ b/lib/libdrm/freedreno/kgsl/kgsl_device.c @@ -61,5 +61,7 @@ drm_private struct fd_device * kgsl_device_new(int fd) dev = &kgsl_dev->base; dev->funcs = &funcs; + dev->bo_size = sizeof(struct kgsl_bo); + return dev; } diff --git a/lib/libdrm/freedreno/msm/msm_device.c b/lib/libdrm/freedreno/msm/msm_device.c index 727baa443..c454938d4 100644 --- a/lib/libdrm/freedreno/msm/msm_device.c +++ b/lib/libdrm/freedreno/msm/msm_device.c @@ -64,5 +64,7 @@ drm_private struct fd_device * msm_device_new(int fd) fd_bo_cache_init(&msm_dev->ring_cache, TRUE); + dev->bo_size = sizeof(struct msm_bo); + return dev; } diff --git a/lib/libdrm/freedreno/msm/msm_ringbuffer.c b/lib/libdrm/freedreno/msm/msm_ringbuffer.c index 17194f4cc..c3b2ededf 100644 --- a/lib/libdrm/freedreno/msm/msm_ringbuffer.c +++ b/lib/libdrm/freedreno/msm/msm_ringbuffer.c @@ -496,11 +496,16 @@ static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, if (ring->pipe->gpu_id >= 500) { struct drm_msm_gem_submit_reloc *reloc_hi; + /* NOTE: grab reloc_idx *before* APPEND() since that could + * realloc() meaning that 'reloc' ptr is no longer valid: + */ + uint32_t reloc_idx = reloc->reloc_idx; + idx = APPEND(cmd, relocs); reloc_hi = &cmd->relocs[idx]; - reloc_hi->reloc_idx = reloc->reloc_idx; + reloc_hi->reloc_idx = reloc_idx; reloc_hi->reloc_offset = r->offset; reloc_hi->or = r->orhi; reloc_hi->shift = r->shift - 32; diff --git a/lib/libdrm/include/drm/README b/lib/libdrm/include/drm/README index a50b02c0a..870b0b5b6 100644 --- a/lib/libdrm/include/drm/README +++ b/lib/libdrm/include/drm/README @@ -67,6 +67,8 @@ That said, it's up-to the individual developers to sync with newer version When and how to update these files ---------------------------------- +Note: One should not do _any_ changes to the files apart from the steps below. + In order to update the files do the following: - Switch to a Linux kernel tree/branch which is not rebased. For example: airlied/drm-next @@ -94,10 +96,6 @@ Status: ? Promote to fixed size ints, which match the current (32bit) ones. -amdgpu_drm.h - - Using the stdint.h uint*_t over the respective __u* ones -Status: Trivial. - drm_mode.h - Missing DPI encode/connector pair. Status: Trivial. diff --git a/lib/libdrm/include/drm/amdgpu_drm.h b/lib/libdrm/include/drm/amdgpu_drm.h index d8f249766..516a9f285 100644 --- a/lib/libdrm/include/drm/amdgpu_drm.h +++ b/lib/libdrm/include/drm/amdgpu_drm.h @@ -50,6 +50,7 @@ extern "C" { #define DRM_AMDGPU_WAIT_CS 0x09 #define DRM_AMDGPU_GEM_OP 0x10 #define DRM_AMDGPU_GEM_USERPTR 0x11 +#define DRM_AMDGPU_WAIT_FENCES 0x12 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -63,6 +64,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) #define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) +#define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -79,22 +81,26 @@ extern "C" { #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) /* Flag that the memory should be in VRAM and cleared */ #define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) +/* Flag that create shadow bo(GTT) while allocating vram bo */ +#define AMDGPU_GEM_CREATE_SHADOW (1 << 4) +/* Flag that allocating the BO should use linear VRAM */ +#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) struct drm_amdgpu_gem_create_in { /** the requested memory size */ - uint64_t bo_size; + __u64 bo_size; /** physical start_addr alignment in bytes for some HW requirements */ - uint64_t alignment; + __u64 alignment; /** the requested memory domains */ - uint64_t domains; + __u64 domains; /** allocation flags */ - uint64_t domain_flags; + __u64 domain_flags; }; struct drm_amdgpu_gem_create_out { /** returned GEM object handle */ - uint32_t handle; - uint32_t _pad; + __u32 handle; + __u32 _pad; }; union drm_amdgpu_gem_create { @@ -111,28 +117,28 @@ union drm_amdgpu_gem_create { struct drm_amdgpu_bo_list_in { /** Type of operation */ - uint32_t operation; + __u32 operation; /** Handle of list or 0 if we want to create one */ - uint32_t list_handle; + __u32 list_handle; /** Number of BOs in list */ - uint32_t bo_number; + __u32 bo_number; /** Size of each element describing BO */ - uint32_t bo_info_size; + __u32 bo_info_size; /** Pointer to array describing BOs */ - uint64_t bo_info_ptr; + __u64 bo_info_ptr; }; struct drm_amdgpu_bo_list_entry { /** Handle of BO */ - uint32_t bo_handle; + __u32 bo_handle; /** New (if specified) BO priority to be used during migration */ - uint32_t bo_priority; + __u32 bo_priority; }; struct drm_amdgpu_bo_list_out { /** Handle of resource list */ - uint32_t list_handle; - uint32_t _pad; + __u32 list_handle; + __u32 _pad; }; union drm_amdgpu_bo_list { @@ -156,26 +162,26 @@ union drm_amdgpu_bo_list { struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ - uint32_t op; + __u32 op; /** For future use, no flags defined so far */ - uint32_t flags; - uint32_t ctx_id; - uint32_t _pad; + __u32 flags; + __u32 ctx_id; + __u32 _pad; }; union drm_amdgpu_ctx_out { struct { - uint32_t ctx_id; - uint32_t _pad; + __u32 ctx_id; + __u32 _pad; } alloc; struct { /** For future use, no flags defined so far */ - uint64_t flags; + __u64 flags; /** Number of resets caused by this context so far. */ - uint32_t hangs; + __u32 hangs; /** Reset status since the last call of the ioctl. */ - uint32_t reset_status; + __u32 reset_status; } state; }; @@ -195,14 +201,15 @@ union drm_amdgpu_ctx { #define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) struct drm_amdgpu_gem_userptr { - uint64_t addr; - uint64_t size; + __u64 addr; + __u64 size; /* AMDGPU_GEM_USERPTR_* */ - uint32_t flags; + __u32 flags; /* Resulting GEM handle */ - uint32_t handle; + __u32 handle; }; +/* SI-CI-VI: */ /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ #define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 #define AMDGPU_TILING_ARRAY_MODE_MASK 0xf @@ -221,10 +228,15 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_NUM_BANKS_SHIFT 21 #define AMDGPU_TILING_NUM_BANKS_MASK 0x3 +/* GFX9 and later: */ +#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f + +/* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ - (((value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) + (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) #define AMDGPU_TILING_GET(value, field) \ - (((value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) + (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) #define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 #define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 @@ -232,28 +244,28 @@ struct drm_amdgpu_gem_userptr { /** The same structure is shared for input/output */ struct drm_amdgpu_gem_metadata { /** GEM Object handle */ - uint32_t handle; + __u32 handle; /** Do we want get or set metadata */ - uint32_t op; + __u32 op; struct { /** For future use, no flags defined so far */ - uint64_t flags; + __u64 flags; /** family specific tiling info */ - uint64_t tiling_info; - uint32_t data_size_bytes; - uint32_t data[64]; + __u64 tiling_info; + __u32 data_size_bytes; + __u32 data[64]; } data; }; struct drm_amdgpu_gem_mmap_in { /** the GEM object handle */ - uint32_t handle; - uint32_t _pad; + __u32 handle; + __u32 _pad; }; struct drm_amdgpu_gem_mmap_out { /** mmap offset from the vma offset manager */ - uint64_t addr_ptr; + __u64 addr_ptr; }; union drm_amdgpu_gem_mmap { @@ -263,18 +275,18 @@ union drm_amdgpu_gem_mmap { struct drm_amdgpu_gem_wait_idle_in { /** GEM object handle */ - uint32_t handle; + __u32 handle; /** For future use, no flags defined so far */ - uint32_t flags; + __u32 flags; /** Absolute timeout to wait */ - uint64_t timeout; + __u64 timeout; }; struct drm_amdgpu_gem_wait_idle_out { /** BO status: 0 - BO is idle, 1 - BO is busy */ - uint32_t status; + __u32 status; /** Returned current memory domain */ - uint32_t domain; + __u32 domain; }; union drm_amdgpu_gem_wait_idle { @@ -284,18 +296,18 @@ union drm_amdgpu_gem_wait_idle { struct drm_amdgpu_wait_cs_in { /** Command submission handle */ - uint64_t handle; + __u64 handle; /** Absolute timeout to wait */ - uint64_t timeout; - uint32_t ip_type; - uint32_t ip_instance; - uint32_t ring; - uint32_t ctx_id; + __u64 timeout; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; }; struct drm_amdgpu_wait_cs_out { /** CS status: 0 - CS completed, 1 - CS still busy */ - uint64_t status; + __u64 status; }; union drm_amdgpu_wait_cs { @@ -303,21 +315,49 @@ union drm_amdgpu_wait_cs { struct drm_amdgpu_wait_cs_out out; }; +struct drm_amdgpu_fence { + __u32 ctx_id; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u64 seq_no; +}; + +struct drm_amdgpu_wait_fences_in { + /** This points to uint64_t * which points to fences */ + __u64 fences; + __u32 fence_count; + __u32 wait_all; + __u64 timeout_ns; +}; + +struct drm_amdgpu_wait_fences_out { + __u32 status; + __u32 first_signaled; +}; + +union drm_amdgpu_wait_fences { + struct drm_amdgpu_wait_fences_in in; + struct drm_amdgpu_wait_fences_out out; +}; + #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 #define AMDGPU_GEM_OP_SET_PLACEMENT 1 /* Sets or returns a value associated with a buffer. */ struct drm_amdgpu_gem_op { /** GEM object handle */ - uint32_t handle; + __u32 handle; /** AMDGPU_GEM_OP_* */ - uint32_t op; + __u32 op; /** Input or return value */ - uint64_t value; + __u64 value; }; #define AMDGPU_VA_OP_MAP 1 #define AMDGPU_VA_OP_UNMAP 2 +#define AMDGPU_VA_OP_CLEAR 3 +#define AMDGPU_VA_OP_REPLACE 4 /* Delay the page table update till the next CS */ #define AMDGPU_VM_DELAY_UPDATE (1 << 0) @@ -329,21 +369,35 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) /* executable mapping, new for VI */ #define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) +/* partially resident texture */ +#define AMDGPU_VM_PAGE_PRT (1 << 4) +/* MTYPE flags use bit 5 to 8 */ +#define AMDGPU_VM_MTYPE_MASK (0xf << 5) +/* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ +#define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) +/* Use NC MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_NC (1 << 5) +/* Use WC MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_WC (2 << 5) +/* Use CC MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_CC (3 << 5) +/* Use UC MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_UC (4 << 5) struct drm_amdgpu_gem_va { /** GEM object handle */ - uint32_t handle; - uint32_t _pad; + __u32 handle; + __u32 _pad; /** AMDGPU_VA_OP_* */ - uint32_t operation; + __u32 operation; /** AMDGPU_VM_PAGE_* */ - uint32_t flags; + __u32 flags; /** va address to assign . Must be correctly aligned.*/ - uint64_t va_address; + __u64 va_address; /** Specify offset inside of BO to assign. Must be correctly aligned.*/ - uint64_t offset_in_bo; + __u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */ - uint64_t map_size; + __u64 map_size; }; #define AMDGPU_HW_IP_GFX 0 @@ -351,7 +405,8 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_DMA 2 #define AMDGPU_HW_IP_UVD 3 #define AMDGPU_HW_IP_VCE 4 -#define AMDGPU_HW_IP_NUM 5 +#define AMDGPU_HW_IP_UVD_ENC 5 +#define AMDGPU_HW_IP_NUM 6 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 @@ -360,24 +415,24 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 struct drm_amdgpu_cs_chunk { - uint32_t chunk_id; - uint32_t length_dw; - uint64_t chunk_data; + __u32 chunk_id; + __u32 length_dw; + __u64 chunk_data; }; struct drm_amdgpu_cs_in { /** Rendering context id */ - uint32_t ctx_id; + __u32 ctx_id; /** Handle of resource list associated with CS */ - uint32_t bo_list_handle; - uint32_t num_chunks; - uint32_t _pad; - /** this points to uint64_t * which point to cs chunks */ - uint64_t chunks; + __u32 bo_list_handle; + __u32 num_chunks; + __u32 _pad; + /** this points to __u64 * which point to cs chunks */ + __u64 chunks; }; struct drm_amdgpu_cs_out { - uint64_t handle; + __u64 handle; }; union drm_amdgpu_cs { @@ -390,36 +445,39 @@ union drm_amdgpu_cs { /* This IB should be submitted to CE */ #define AMDGPU_IB_FLAG_CE (1<<0) -/* CE Preamble */ +/* Preamble flag, which means the IB could be dropped if no context switch */ #define AMDGPU_IB_FLAG_PREAMBLE (1<<1) +/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ +#define AMDGPU_IB_FLAG_PREEMPT (1<<2) + struct drm_amdgpu_cs_chunk_ib { - uint32_t _pad; + __u32 _pad; /** AMDGPU_IB_FLAG_* */ - uint32_t flags; + __u32 flags; /** Virtual address to begin IB execution */ - uint64_t va_start; + __u64 va_start; /** Size of submission */ - uint32_t ib_bytes; + __u32 ib_bytes; /** HW IP to submit to */ - uint32_t ip_type; + __u32 ip_type; /** HW IP index of the same type to submit to */ - uint32_t ip_instance; + __u32 ip_instance; /** Ring index to submit to */ - uint32_t ring; + __u32 ring; }; struct drm_amdgpu_cs_chunk_dep { - uint32_t ip_type; - uint32_t ip_instance; - uint32_t ring; - uint32_t ctx_id; - uint64_t handle; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; + __u64 handle; }; struct drm_amdgpu_cs_chunk_fence { - uint32_t handle; - uint32_t offset; + __u32 handle; + __u32 offset; }; struct drm_amdgpu_cs_chunk_data { @@ -434,6 +492,7 @@ struct drm_amdgpu_cs_chunk_data { * */ #define AMDGPU_IDS_FLAGS_FUSION 0x1 +#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 /* indicate if acceleration can be working */ #define AMDGPU_INFO_ACCEL_WORKING 0x00 @@ -467,6 +526,10 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_SMC 0x0a /* Subquery id: Query SDMA firmware version */ #define AMDGPU_INFO_FW_SDMA 0x0b + /* Subquery id: Query PSP SOS firmware version */ + #define AMDGPU_INFO_FW_SOS 0x0c + /* Subquery id: Query PSP ASD firmware version */ + #define AMDGPU_INFO_FW_ASD 0x0d /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ @@ -483,6 +546,36 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_DEV_INFO 0x16 /* visible vram usage */ #define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 +/* number of TTM buffer evictions */ +#define AMDGPU_INFO_NUM_EVICTIONS 0x18 +/* Query memory about VRAM and GTT domains */ +#define AMDGPU_INFO_MEMORY 0x19 +/* Query vce clock table */ +#define AMDGPU_INFO_VCE_CLOCK_TABLE 0x1A +/* Query vbios related information */ +#define AMDGPU_INFO_VBIOS 0x1B + /* Subquery id: Query vbios size */ + #define AMDGPU_INFO_VBIOS_SIZE 0x1 + /* Subquery id: Query vbios image */ + #define AMDGPU_INFO_VBIOS_IMAGE 0x2 +/* Query UVD handles */ +#define AMDGPU_INFO_NUM_HANDLES 0x1C +/* Query sensor related information */ +#define AMDGPU_INFO_SENSOR 0x1D + /* Subquery id: Query GPU shader clock */ + #define AMDGPU_INFO_SENSOR_GFX_SCLK 0x1 + /* Subquery id: Query GPU memory clock */ + #define AMDGPU_INFO_SENSOR_GFX_MCLK 0x2 + /* Subquery id: Query GPU temperature */ + #define AMDGPU_INFO_SENSOR_GPU_TEMP 0x3 + /* Subquery id: Query GPU load */ + #define AMDGPU_INFO_SENSOR_GPU_LOAD 0x4 + /* Subquery id: Query average GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_AVG_POWER 0x5 + /* Subquery id: Query northbridge voltage */ + #define AMDGPU_INFO_SENSOR_VDDNB 0x6 + /* Subquery id: Query graphics voltage */ + #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff @@ -491,86 +584,123 @@ struct drm_amdgpu_cs_chunk_data { struct drm_amdgpu_query_fw { /** AMDGPU_INFO_FW_* */ - uint32_t fw_type; + __u32 fw_type; /** * Index of the IP if there are more IPs of * the same type. */ - uint32_t ip_instance; + __u32 ip_instance; /** * Index of the engine. Whether this is used depends * on the firmware type. (e.g. MEC, SDMA) */ - uint32_t index; - uint32_t _pad; + __u32 index; + __u32 _pad; }; /* Input structure for the INFO ioctl */ struct drm_amdgpu_info { /* Where the return value will be stored */ - uint64_t return_pointer; + __u64 return_pointer; /* The size of the return value. Just like "size" in "snprintf", * it limits how many bytes the kernel can write. */ - uint32_t return_size; + __u32 return_size; /* The query request id. */ - uint32_t query; + __u32 query; union { struct { - uint32_t id; - uint32_t _pad; + __u32 id; + __u32 _pad; } mode_crtc; struct { /** AMDGPU_HW_IP_* */ - uint32_t type; + __u32 type; /** * Index of the IP if there are more IPs of the same * type. Ignored by AMDGPU_INFO_HW_IP_COUNT. */ - uint32_t ip_instance; + __u32 ip_instance; } query_hw_ip; struct { - uint32_t dword_offset; + __u32 dword_offset; /** number of registers to read */ - uint32_t count; - uint32_t instance; + __u32 count; + __u32 instance; /** For future use, no flags defined so far */ - uint32_t flags; + __u32 flags; } read_mmr_reg; struct drm_amdgpu_query_fw query_fw; + + struct { + __u32 type; + __u32 offset; + } vbios_info; + + struct { + __u32 type; + } sensor_info; }; }; struct drm_amdgpu_info_gds { /** GDS GFX partition size */ - uint32_t gds_gfx_partition_size; + __u32 gds_gfx_partition_size; /** GDS compute partition size */ - uint32_t compute_partition_size; + __u32 compute_partition_size; /** total GDS memory size */ - uint32_t gds_total_size; + __u32 gds_total_size; /** GWS size per GFX partition */ - uint32_t gws_per_gfx_partition; + __u32 gws_per_gfx_partition; /** GSW size per compute partition */ - uint32_t gws_per_compute_partition; + __u32 gws_per_compute_partition; /** OA size per GFX partition */ - uint32_t oa_per_gfx_partition; + __u32 oa_per_gfx_partition; /** OA size per compute partition */ - uint32_t oa_per_compute_partition; - uint32_t _pad; + __u32 oa_per_compute_partition; + __u32 _pad; }; struct drm_amdgpu_info_vram_gtt { - uint64_t vram_size; - uint64_t vram_cpu_accessible_size; - uint64_t gtt_size; + __u64 vram_size; + __u64 vram_cpu_accessible_size; + __u64 gtt_size; +}; + +struct drm_amdgpu_heap_info { + /** max. physical memory */ + __u64 total_heap_size; + + /** Theoretical max. available memory in the given heap */ + __u64 usable_heap_size; + + /** + * Number of bytes allocated in the heap. This includes all processes + * and private allocations in the kernel. It changes when new buffers + * are allocated, freed, and moved. It cannot be larger than + * heap_size. + */ + __u64 heap_usage; + + /** + * Theoretical possible max. size of buffer which + * could be allocated in the given heap + */ + __u64 max_allocation; +}; + +struct drm_amdgpu_memory_info { + struct drm_amdgpu_heap_info vram; + struct drm_amdgpu_heap_info cpu_accessible_vram; + struct drm_amdgpu_heap_info gtt; }; struct drm_amdgpu_info_firmware { - uint32_t ver; - uint32_t feature; + __u32 ver; + __u32 feature; }; #define AMDGPU_VRAM_TYPE_UNKNOWN 0 @@ -584,71 +714,108 @@ struct drm_amdgpu_info_firmware { struct drm_amdgpu_info_device { /** PCI Device ID */ - uint32_t device_id; + __u32 device_id; /** Internal chip revision: A0, A1, etc.) */ - uint32_t chip_rev; - uint32_t external_rev; + __u32 chip_rev; + __u32 external_rev; /** Revision id in PCI Config space */ - uint32_t pci_rev; - uint32_t family; - uint32_t num_shader_engines; - uint32_t num_shader_arrays_per_engine; + __u32 pci_rev; + __u32 family; + __u32 num_shader_engines; + __u32 num_shader_arrays_per_engine; /* in KHz */ - uint32_t gpu_counter_freq; - uint64_t max_engine_clock; - uint64_t max_memory_clock; + __u32 gpu_counter_freq; + __u64 max_engine_clock; + __u64 max_memory_clock; /* cu information */ - uint32_t cu_active_number; - uint32_t cu_ao_mask; - uint32_t cu_bitmap[4][4]; + __u32 cu_active_number; + __u32 cu_ao_mask; + __u32 cu_bitmap[4][4]; /** Render backend pipe mask. One render backend is CB+DB. */ - uint32_t enabled_rb_pipes_mask; - uint32_t num_rb_pipes; - uint32_t num_hw_gfx_contexts; - uint32_t _pad; - uint64_t ids_flags; + __u32 enabled_rb_pipes_mask; + __u32 num_rb_pipes; + __u32 num_hw_gfx_contexts; + __u32 _pad; + __u64 ids_flags; /** Starting virtual address for UMDs. */ - uint64_t virtual_address_offset; + __u64 virtual_address_offset; /** The maximum virtual address */ - uint64_t virtual_address_max; + __u64 virtual_address_max; /** Required alignment of virtual addresses. */ - uint32_t virtual_address_alignment; + __u32 virtual_address_alignment; /** Page table entry - fragment size */ - uint32_t pte_fragment_size; - uint32_t gart_page_size; + __u32 pte_fragment_size; + __u32 gart_page_size; /** constant engine ram size*/ - uint32_t ce_ram_size; + __u32 ce_ram_size; /** video memory type info*/ - uint32_t vram_type; + __u32 vram_type; /** video memory bit width*/ - uint32_t vram_bit_width; + __u32 vram_bit_width; /* vce harvesting instance */ - uint32_t vce_harvest_config; + __u32 vce_harvest_config; + /* gfx double offchip LDS buffers */ + __u32 gc_double_offchip_lds_buf; + /* NGG Primitive Buffer */ + __u64 prim_buf_gpu_addr; + /* NGG Position Buffer */ + __u64 pos_buf_gpu_addr; + /* NGG Control Sideband */ + __u64 cntl_sb_buf_gpu_addr; + /* NGG Parameter Cache */ + __u64 param_buf_gpu_addr; }; struct drm_amdgpu_info_hw_ip { /** Version of h/w IP */ - uint32_t hw_ip_version_major; - uint32_t hw_ip_version_minor; + __u32 hw_ip_version_major; + __u32 hw_ip_version_minor; /** Capabilities */ - uint64_t capabilities_flags; + __u64 capabilities_flags; /** command buffer address start alignment*/ - uint32_t ib_start_alignment; + __u32 ib_start_alignment; /** command buffer size alignment*/ - uint32_t ib_size_alignment; + __u32 ib_size_alignment; /** Bitmask of available rings. Bit 0 means ring 0, etc. */ - uint32_t available_rings; - uint32_t _pad; + __u32 available_rings; + __u32 _pad; +}; + +struct drm_amdgpu_info_num_handles { + /** Max handles as supported by firmware for UVD */ + __u32 uvd_max_handles; + /** Handles currently in use for UVD */ + __u32 uvd_used_handles; +}; + +#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES 6 + +struct drm_amdgpu_info_vce_clock_table_entry { + /** System clock */ + __u32 sclk; + /** Memory clock */ + __u32 mclk; + /** VCE clock */ + __u32 eclk; + __u32 pad; +}; + +struct drm_amdgpu_info_vce_clock_table { + struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES]; + __u32 num_valid_entries; + __u32 pad; }; /* * Supported GPU families */ #define AMDGPU_FAMILY_UNKNOWN 0 +#define AMDGPU_FAMILY_SI 110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */ #define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ #define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ #define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ #define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ +#define AMDGPU_FAMILY_AI 141 /* Vega10 */ #if defined(__cplusplus) } diff --git a/lib/libdrm/include/drm/drm_mode.h b/lib/libdrm/include/drm/drm_mode.h index df0e3504c..70571af60 100644 --- a/lib/libdrm/include/drm/drm_mode.h +++ b/lib/libdrm/include/drm/drm_mode.h @@ -47,7 +47,15 @@ extern "C" { #define DRM_MODE_TYPE_DRIVER (1<<6) /* Video mode flags */ -/* bit compatible with the xorg definitions. */ +/* bit compatible with the xrandr RR_ definitions (bits 0-13) + * + * ABI warning: Existing userspace really expects + * the mode flags to match the xrandr definitions. Any + * changes that don't match the xrandr definitions will + * likely need a new client cap or some other mechanism + * to avoid breaking existing userspace. This includes + * allocating new flags in the previously unused bits! + */ #define DRM_MODE_FLAG_PHSYNC (1<<0) #define DRM_MODE_FLAG_NHSYNC (1<<1) #define DRM_MODE_FLAG_PVSYNC (1<<2) @@ -107,6 +115,10 @@ extern "C" { #define DRM_MODE_DIRTY_ON 1 #define DRM_MODE_DIRTY_ANNOTATE 2 +/* Link Status options */ +#define DRM_MODE_LINK_STATUS_GOOD 0 +#define DRM_MODE_LINK_STATUS_BAD 1 + struct drm_mode_modeinfo { __u32 clock; __u16 hdisplay; @@ -220,14 +232,16 @@ struct drm_mode_get_encoder { /* This is for connectors with multiple signal types. */ /* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */ -#define DRM_MODE_SUBCONNECTOR_Automatic 0 -#define DRM_MODE_SUBCONNECTOR_Unknown 0 -#define DRM_MODE_SUBCONNECTOR_DVID 3 -#define DRM_MODE_SUBCONNECTOR_DVIA 4 -#define DRM_MODE_SUBCONNECTOR_Composite 5 -#define DRM_MODE_SUBCONNECTOR_SVIDEO 6 -#define DRM_MODE_SUBCONNECTOR_Component 8 -#define DRM_MODE_SUBCONNECTOR_SCART 9 +enum drm_mode_subconnector { + DRM_MODE_SUBCONNECTOR_Automatic = 0, + DRM_MODE_SUBCONNECTOR_Unknown = 0, + DRM_MODE_SUBCONNECTOR_DVID = 3, + DRM_MODE_SUBCONNECTOR_DVIA = 4, + DRM_MODE_SUBCONNECTOR_Composite = 5, + DRM_MODE_SUBCONNECTOR_SVIDEO = 6, + DRM_MODE_SUBCONNECTOR_Component = 8, + DRM_MODE_SUBCONNECTOR_SCART = 9, +}; #define DRM_MODE_CONNECTOR_Unknown 0 #define DRM_MODE_CONNECTOR_VGA 1 @@ -392,17 +406,20 @@ struct drm_mode_fb_cmd2 { * offsets[1]. Note that offsets[0] will generally * be 0 (but this is not required). * - * To accommodate tiled, compressed, etc formats, a per-plane + * To accommodate tiled, compressed, etc formats, a * modifier can be specified. The default value of zero * indicates "native" format as specified by the fourcc. - * Vendor specific modifier token. This allows, for example, - * different tiling/swizzling pattern on different planes. - * See discussion above of DRM_FORMAT_MOD_xxx. + * Vendor specific modifier token. Note that even though + * it looks like we have a modifier per-plane, we in fact + * do not. The modifier for each plane must be identical. + * Thus all combinations of different data layouts for + * multi plane formats must be enumerated as separate + * modifiers. */ __u32 handles[4]; __u32 pitches[4]; /* pitch for each plane */ __u32 offsets[4]; /* offset of each plane */ - __u64 modifier[4]; /* ie, tiling, compressed (per plane) */ + __u64 modifier[4]; /* ie, tiling, compress */ }; #define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 diff --git a/lib/libdrm/man/drm-kms.xml b/lib/libdrm/man/drm-kms.xml index ae38dc8d2..eb04c263a 100644 --- a/lib/libdrm/man/drm-kms.xml +++ b/lib/libdrm/man/drm-kms.xml @@ -309,8 +309,8 @@ static int modeset_find_crtc(int fd, drmModeRes *res, drmModeConnector *conn) Reporting Bugs Bugs in this manual should be reported to - http://bugs.freedesktop.org under the "Mesa" product, with "Other" or - "libdrm" as the component. + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&component=libdrm + under the "DRI" product, component "libdrm" diff --git a/lib/libdrm/man/drm-memory.xml b/lib/libdrm/man/drm-memory.xml index 6b4f0759b..3aa7cf259 100644 --- a/lib/libdrm/man/drm-memory.xml +++ b/lib/libdrm/man/drm-memory.xml @@ -410,8 +410,8 @@ memset(map, 0, creq.size); Reporting Bugs Bugs in this manual should be reported to - http://bugs.freedesktop.org under the "Mesa" product, with "Other" or - "libdrm" as the component. + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&component=libdrm + under the "DRI" product, component "libdrm" diff --git a/lib/libdrm/man/drm.xml b/lib/libdrm/man/drm.xml index 5a49fe13e..1f5596694 100644 --- a/lib/libdrm/man/drm.xml +++ b/lib/libdrm/man/drm.xml @@ -50,7 +50,7 @@ In earlier days, the kernel framework was solely used to provide raw hardware access to priviledged user-space processes which implement - all the hardware abstraction layers. But more and more tasks where + all the hardware abstraction layers. But more and more tasks were moved into the kernel. All these interfaces are based on ioctl2 commands on the DRM character device. The libdrm @@ -119,8 +119,8 @@ Reporting Bugs Bugs in this manual should be reported to - http://bugs.freedesktop.org under the "Mesa" product, with "Other" or - "libdrm" as the component. + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&component=libdrm + under the "DRI" product, component "libdrm" diff --git a/lib/libdrm/man/drmAvailable.xml b/lib/libdrm/man/drmAvailable.xml index 55bef94af..1e5d7873b 100644 --- a/lib/libdrm/man/drmAvailable.xml +++ b/lib/libdrm/man/drmAvailable.xml @@ -61,8 +61,8 @@ Reporting Bugs Bugs in this function should be reported to - http://bugs.freedesktop.org under the "Mesa" product, with "Other" or - "libdrm" as the component. + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&component=libdrm + under the "DRI" product, component "libdrm" diff --git a/lib/libdrm/man/drmHandleEvent.xml b/lib/libdrm/man/drmHandleEvent.xml index b1006e514..833044283 100644 --- a/lib/libdrm/man/drmHandleEvent.xml +++ b/lib/libdrm/man/drmHandleEvent.xml @@ -86,8 +86,8 @@ typedef struct _drmEventContext { Reporting Bugs Bugs in this function should be reported to - http://bugs.freedesktop.org under the "Mesa" product, with "Other" or - "libdrm" as the component. + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&component=libdrm + under the "DRI" product, component "libdrm" diff --git a/lib/libdrm/man/drmModeGetResources.xml b/lib/libdrm/man/drmModeGetResources.xml index 2f5e8c2c5..0ab6a68b1 100644 --- a/lib/libdrm/man/drmModeGetResources.xml +++ b/lib/libdrm/man/drmModeGetResources.xml @@ -116,8 +116,8 @@ typedef struct _drmModeRes { Reporting Bugs Bugs in this function should be reported to - http://bugs.freedesktop.org under the "Mesa" product, with "Other" or - "libdrm" as the component. + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI&component=libdrm + under the "DRI" product, component "libdrm" diff --git a/lib/libdrm/tegra/tegra-symbol-check b/lib/libdrm/tegra/tegra-symbol-check index 402083112..420469f41 100755 --- a/lib/libdrm/tegra/tegra-symbol-check +++ b/lib/libdrm/tegra/tegra-symbol-check @@ -1,11 +1,14 @@ #!/bin/bash -# The following symbols (past the first five) are taken from the public headers. -# A list of the latter should be available Makefile.sources/LIBDRM_FREEDRENO_H_FILES +# The following symbols (past the first nine) are taken from tegra.h. FUNCS=$(nm -D --format=bsd --defined-only ${1-.libs/libdrm_tegra.so} | awk '{print $3}'| while read func; do ( grep -q "^$func$" || echo $func ) <> 32; - pm4[i++] = sdma_write_length; + if (gpu_info.family_id >= AMDGPU_FAMILY_AI) + pm4[i++] = sdma_write_length - 1; + else + pm4[i++] = sdma_write_length; while(j++ < sdma_write_length) pm4[i++] = 0xdeadbeaf; } else if ((ip_type == AMDGPU_HW_IP_GFX) || @@ -904,12 +911,16 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) uint32_t *pm4; struct amdgpu_cs_ib_info *ib_info; struct amdgpu_cs_request *ibs_request; + struct amdgpu_gpu_info gpu_info = {0}; uint64_t bo_mc; volatile uint32_t *bo_cpu; int i, j, r, loop; uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; amdgpu_va_handle va_handle; + r = amdgpu_query_gpu_info(device_handle, &gpu_info); + CU_ASSERT_EQUAL(r, 0); + pm4 = calloc(pm4_dw, sizeof(*pm4)); CU_ASSERT_NOT_EQUAL(pm4, NULL); @@ -949,7 +960,10 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) pm4[i++] = 0xffffffff & bo_mc; pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; pm4[i++] = 0xdeadbeaf; - pm4[i++] = sdma_write_length; + if (gpu_info.family_id >= AMDGPU_FAMILY_AI) + pm4[i++] = sdma_write_length - 1; + else + pm4[i++] = sdma_write_length; } else if ((ip_type == AMDGPU_HW_IP_GFX) || (ip_type == AMDGPU_HW_IP_COMPUTE)) { pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); @@ -1007,12 +1021,16 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) uint32_t *pm4; struct amdgpu_cs_ib_info *ib_info; struct amdgpu_cs_request *ibs_request; + struct amdgpu_gpu_info gpu_info = {0}; uint64_t bo1_mc, bo2_mc; volatile unsigned char *bo1_cpu, *bo2_cpu; int i, j, r, loop1, loop2; uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; amdgpu_va_handle bo1_va_handle, bo2_va_handle; + r = amdgpu_query_gpu_info(device_handle, &gpu_info); + CU_ASSERT_EQUAL(r, 0); + pm4 = calloc(pm4_dw, sizeof(*pm4)); CU_ASSERT_NOT_EQUAL(pm4, NULL); @@ -1064,7 +1082,10 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) i = j = 0; if (ip_type == AMDGPU_HW_IP_DMA) { pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); - pm4[i++] = sdma_write_length; + if (gpu_info.family_id >= AMDGPU_FAMILY_AI) + pm4[i++] = sdma_write_length - 1; + else + pm4[i++] = sdma_write_length; pm4[i++] = 0; pm4[i++] = 0xffffffff & bo1_mc; pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; diff --git a/lib/libdrm/tests/amdgpu/cs_tests.c b/lib/libdrm/tests/amdgpu/cs_tests.c index 82c55aa8b..342815dea 100644 --- a/lib/libdrm/tests/amdgpu/cs_tests.c +++ b/lib/libdrm/tests/amdgpu/cs_tests.c @@ -175,11 +175,11 @@ static int submit(unsigned ndw, unsigned ip) static void uvd_cmd(uint64_t addr, unsigned cmd, int *idx) { - ib_cpu[(*idx)++] = 0x3BC4; + ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC4 : 0x81C4; ib_cpu[(*idx)++] = addr; - ib_cpu[(*idx)++] = 0x3BC5; + ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC5 : 0x81C5; ib_cpu[(*idx)++] = addr >> 32; - ib_cpu[(*idx)++] = 0x3BC3; + ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC3 : 0x81C3; ib_cpu[(*idx)++] = cmd << 1; } @@ -211,10 +211,13 @@ static void amdgpu_cs_uvd_create(void) CU_ASSERT_EQUAL(r, 0); memcpy(msg, uvd_create_msg, sizeof(uvd_create_msg)); + if (family_id >= AMDGPU_FAMILY_VI) { ((uint8_t*)msg)[0x10] = 7; - /* chip polaris 10/11 */ - if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) { + /* chip beyond polaris 10/11 */ + if ((family_id == AMDGPU_FAMILY_AI) || + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A || + chip_id == chip_rev+0x64)) { /* dpb size */ ((uint8_t*)msg)[0x28] = 0x00; ((uint8_t*)msg)[0x29] = 0x94; @@ -287,13 +290,16 @@ static void amdgpu_cs_uvd_decode(void) CU_ASSERT_EQUAL(r, 0); memcpy(ptr, uvd_decode_msg, sizeof(uvd_create_msg)); + if (family_id >= AMDGPU_FAMILY_VI) { ptr[0x10] = 7; ptr[0x98] = 0x00; ptr[0x99] = 0x02; - /* chip polaris10/11 */ - if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) { - /*dpb size */ + /* chip beyond polaris10/11 */ + if ((family_id == AMDGPU_FAMILY_AI) || + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A || + chip_id == chip_rev+0x64)) { + /* dpb size */ ptr[0x24] = 0x00; ptr[0x25] = 0x94; ptr[0x26] = 0x6B; @@ -335,9 +341,12 @@ static void amdgpu_cs_uvd_decode(void) bs_addr = fb_addr + 4*1024; dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024); - if ((family_id >= AMDGPU_FAMILY_VI) && - (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) { - ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024); + if (family_id >= AMDGPU_FAMILY_VI) { + if ((family_id == AMDGPU_FAMILY_AI) || + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A || + chip_id == chip_rev+0x64)) { + ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024); + } } dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024); @@ -348,12 +357,16 @@ static void amdgpu_cs_uvd_decode(void) uvd_cmd(dt_addr, 0x2, &i); uvd_cmd(fb_addr, 0x3, &i); uvd_cmd(bs_addr, 0x100, &i); + if (family_id >= AMDGPU_FAMILY_VI) { uvd_cmd(it_addr, 0x204, &i); - if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) + if ((family_id == AMDGPU_FAMILY_AI) || + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A || + chip_id == chip_rev+0x64)) uvd_cmd(ctx_addr, 0x206, &i); -} - ib_cpu[i++] = 0x3BC6; + } + + ib_cpu[i++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC6 : 0x81C6; ib_cpu[i++] = 0x1; for (; i % 16; ++i) ib_cpu[i] = 0x80000000; diff --git a/lib/libdrm/tests/amdgpu/vce_tests.c b/lib/libdrm/tests/amdgpu/vce_tests.c index de63aa152..b03807b26 100644 --- a/lib/libdrm/tests/amdgpu/vce_tests.c +++ b/lib/libdrm/tests/amdgpu/vce_tests.c @@ -234,6 +234,7 @@ static void free_resource(struct amdgpu_vce_bo *vce_bo) static void amdgpu_cs_vce_create(void) { + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; int len, r; enc.width = vce_create[6]; @@ -250,6 +251,8 @@ static void amdgpu_cs_vce_create(void) memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); len += sizeof(vce_taskinfo) / 4; memcpy((ib_cpu + len), vce_create, sizeof(vce_create)); + ib_cpu[len + 8] = ALIGN(enc.width, align); + ib_cpu[len + 9] = ALIGN(enc.width, align); len += sizeof(vce_create) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc.fb[0].addr >> 32; @@ -291,10 +294,12 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) { uint64_t luma_offset, chroma_offset; - int len = 0, r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); + int len = 0, i, r; luma_offset = enc->vbuf.addr; - chroma_offset = luma_offset + enc->width * enc->height; + chroma_offset = luma_offset + luma_size; memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); len += sizeof(vce_session) / 4; @@ -309,6 +314,10 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) ib_cpu[len + 3] = enc->cpb.addr; len += sizeof(vce_context_buffer) / 4; memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); + for (i = 0; i < 8; ++i) + ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); + for (i = 0; i < 8; ++i) + ib_cpu[len + 10 + i] = luma_size * 1.5; len += sizeof(vce_aux_buffer) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc->fb[0].addr >> 32; @@ -319,8 +328,10 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) ib_cpu[len + 10] = luma_offset; ib_cpu[len + 11] = chroma_offset >> 32; ib_cpu[len + 12] = chroma_offset; - ib_cpu[len + 73] = 0x7800; - ib_cpu[len + 74] = 0x7800 + 0x5000; + ib_cpu[len + 14] = ALIGN(enc->width, align); + ib_cpu[len + 15] = ALIGN(enc->width, align); + ib_cpu[len + 73] = luma_size * 1.5; + ib_cpu[len + 74] = luma_size * 2.5; len += sizeof(vce_encode) / 4; enc->ib_len = len; if (!enc->two_instance) { @@ -332,11 +343,13 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc) { uint64_t luma_offset, chroma_offset; - int len, r; + int len, i, r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); len = (enc->two_instance) ? enc->ib_len : 0; luma_offset = enc->vbuf.addr; - chroma_offset = luma_offset + enc->width * enc->height; + chroma_offset = luma_offset + luma_size; if (!enc->two_instance) { memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); @@ -353,6 +366,10 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc) ib_cpu[len + 3] = enc->cpb.addr; len += sizeof(vce_context_buffer) / 4; memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); + for (i = 0; i < 8; ++i) + ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); + for (i = 0; i < 8; ++i) + ib_cpu[len + 10 + i] = luma_size * 1.5; len += sizeof(vce_aux_buffer) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc->fb[1].addr >> 32; @@ -364,15 +381,17 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc) ib_cpu[len + 10] = luma_offset; ib_cpu[len + 11] = chroma_offset >> 32; ib_cpu[len + 12] = chroma_offset; + ib_cpu[len + 14] = ALIGN(enc->width, align); + ib_cpu[len + 15] = ALIGN(enc->width, align); ib_cpu[len + 18] = 0; ib_cpu[len + 19] = 0; ib_cpu[len + 56] = 3; ib_cpu[len + 57] = 0; ib_cpu[len + 58] = 0; - ib_cpu[len + 59] = 0x7800; - ib_cpu[len + 60] = 0x7800 + 0x5000; + ib_cpu[len + 59] = luma_size * 1.5; + ib_cpu[len + 60] = luma_size * 2.5; ib_cpu[len + 73] = 0; - ib_cpu[len + 74] = 0x5000; + ib_cpu[len + 74] = luma_size; ib_cpu[len + 81] = 1; ib_cpu[len + 82] = 1; len += sizeof(vce_encode) / 4; @@ -408,9 +427,10 @@ static void check_result(struct amdgpu_vce_encode *enc) static void amdgpu_cs_vce_encode(void) { uint32_t vbuf_size, bs_size = 0x154000, cpb_size; - int r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + int i, r; - vbuf_size = enc.width * enc.height * 1.5; + vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5; cpb_size = vbuf_size * 10; num_resources = 0; alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); @@ -429,7 +449,17 @@ static void amdgpu_cs_vce_encode(void) r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr); CU_ASSERT_EQUAL(r, 0); - memcpy(enc.vbuf.ptr, frame, sizeof(frame)); + + memset(enc.vbuf.ptr, 0, vbuf_size); + for (i = 0; i < enc.height; ++i) { + memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width); + enc.vbuf.ptr += ALIGN(enc.width, align); + } + for (i = 0; i < enc.height / 2; ++i) { + memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width); + enc.vbuf.ptr += ALIGN(enc.width, align); + } + r = amdgpu_bo_cpu_unmap(enc.vbuf.handle); CU_ASSERT_EQUAL(r, 0); diff --git a/lib/libdrm/tests/etnaviv/Makefile.am b/lib/libdrm/tests/etnaviv/Makefile.am index 06318643e..226baee28 100644 --- a/lib/libdrm/tests/etnaviv/Makefile.am +++ b/lib/libdrm/tests/etnaviv/Makefile.am @@ -28,6 +28,7 @@ etnaviv_2d_test_SOURCES = \ write_bmp.h etnaviv_cmd_stream_test_LDADD = \ + $(top_builddir)/libdrm.la \ $(top_builddir)/etnaviv/libdrm_etnaviv.la etnaviv_cmd_stream_test_SOURCES = \ diff --git a/lib/libdrm/tests/exynos/exynos_fimg2d_test.c b/lib/libdrm/tests/exynos/exynos_fimg2d_test.c index 797fb6eb2..ab1028e8b 100644 --- a/lib/libdrm/tests/exynos/exynos_fimg2d_test.c +++ b/lib/libdrm/tests/exynos/exynos_fimg2d_test.c @@ -59,7 +59,6 @@ static void connector_find_mode(int fd, struct connector *c, if (!connector) { fprintf(stderr, "could not get connector %i: %s\n", resources->connectors[i], strerror(errno)); - drmModeFreeConnector(connector); continue; } @@ -98,7 +97,6 @@ static void connector_find_mode(int fd, struct connector *c, if (!c->encoder) { fprintf(stderr, "could not get encoder %i: %s\n", resources->encoders[i], strerror(errno)); - drmModeFreeEncoder(c->encoder); continue; } @@ -264,7 +262,8 @@ static int g2d_copy_test(struct exynos_device *dev, struct exynos_bo *src, userptr = (unsigned long)malloc(size); if (!userptr) { fprintf(stderr, "failed to allocate userptr.\n"); - return -EFAULT; + ret = -EFAULT; + goto fail; } src_img.user_ptr[0].userptr = userptr; @@ -469,7 +468,8 @@ static int g2d_copy_with_scale_test(struct exynos_device *dev, userptr = (unsigned long)malloc(size); if (!userptr) { fprintf(stderr, "failed to allocate userptr.\n"); - return -EFAULT; + ret = -EFAULT; + goto fail; } src_img.user_ptr[0].userptr = userptr; @@ -520,9 +520,10 @@ err_free_userptr: fail: g2d_fini(ctx); - return 0; + return ret;; } +#if EXYNOS_G2D_USERPTR_TEST static int g2d_blend_test(struct exynos_device *dev, struct exynos_bo *src, struct exynos_bo *dst, @@ -557,7 +558,8 @@ static int g2d_blend_test(struct exynos_device *dev, userptr = (unsigned long)malloc(size); if (!userptr) { fprintf(stderr, "failed to allocate userptr.\n"); - return -EFAULT; + ret = -EFAULT; + goto fail; } src_img.user_ptr[0].userptr = userptr; @@ -619,8 +621,9 @@ err_free_userptr: fail: g2d_fini(ctx); - return 0; + return ret; } +#endif static int g2d_checkerboard_test(struct exynos_device *dev, struct exynos_bo *src, @@ -645,8 +648,8 @@ static int g2d_checkerboard_test(struct exynos_device *dev, dst_y = 0; checkerboard = create_checkerboard_pattern(screen_width / 32, screen_height / 32, 32); - if (checkerboard == NULL) { - ret = -1; + if (!checkerboard) { + ret = -EFAULT; goto fail; } @@ -755,8 +758,8 @@ int main(int argc, char **argv) dev = exynos_device_create(fd); if (!dev) { - drmClose(dev->fd); - return -EFAULT; + ret = -EFAULT; + goto err_drm_close; } resources = drmModeGetResources(dev->fd); @@ -764,7 +767,7 @@ int main(int argc, char **argv) fprintf(stderr, "drmModeGetResources failed: %s\n", strerror(errno)); ret = -EFAULT; - goto err_drm_close; + goto err_dev_destory; } connector_find_mode(dev->fd, &con, resources); @@ -773,7 +776,7 @@ int main(int argc, char **argv) if (!con.mode) { fprintf(stderr, "failed to find usable connector\n"); ret = -EFAULT; - goto err_drm_close; + goto err_dev_destory; } screen_width = con.mode->hdisplay; @@ -782,7 +785,7 @@ int main(int argc, char **argv) if (screen_width == 0 || screen_height == 0) { fprintf(stderr, "failed to find sane resolution on connector\n"); ret = -EFAULT; - goto err_drm_close; + goto err_dev_destory; } printf("screen width = %d, screen height = %d\n", screen_width, @@ -791,7 +794,7 @@ int main(int argc, char **argv) bo = exynos_create_buffer(dev, screen_width * screen_height * 4, 0); if (!bo) { ret = -EFAULT; - goto err_drm_close; + goto err_dev_destory; } handles[0] = bo->handle; @@ -864,7 +867,7 @@ int main(int argc, char **argv) * * Disable the test for now, until the kernel code has been sanitized. */ -#if 0 +#if EXYNOS_G2D_USERPTR_TEST ret = g2d_blend_test(dev, src, bo, G2D_IMGBUF_USERPTR); if (ret < 0) fprintf(stderr, "failed to test blend operation.\n"); @@ -882,9 +885,11 @@ err_rm_fb: err_destroy_buffer: exynos_destroy_buffer(bo); -err_drm_close: - drmClose(dev->fd); +err_dev_destory: exynos_device_destroy(dev); - return 0; +err_drm_close: + drmClose(fd); + + return ret; }