From 9838f6edad3bea18170fbc888c10d830a88b4b59 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sun, 23 Jun 2024 16:44:36 +0000 Subject: [PATCH] Sun Jun 23 16:43:41 UTC 2024 Kernel update --- bsp/drivers/gpu/panfrost/panfrost_devfreq.c | 6 +- bsp/drivers/gpu/panfrost/panfrost_device.c | 9 +- bsp/drivers/gpu/panfrost/panfrost_drv.c | 69 +++--- bsp/drivers/gpu/panfrost/panfrost_features.h | 231 +----------------- bsp/drivers/gpu/panfrost/panfrost_gem.c | 18 +- bsp/drivers/gpu/panfrost/panfrost_gem.h | 5 +- .../gpu/panfrost/panfrost_gem_shrinker.c | 2 +- bsp/drivers/gpu/panfrost/panfrost_gpu.c | 169 +++---------- bsp/drivers/gpu/panfrost/panfrost_issues.h | 2 +- bsp/drivers/gpu/panfrost/panfrost_job.c | 56 ++--- bsp/drivers/gpu/panfrost/panfrost_job.h | 5 +- bsp/drivers/gpu/panfrost/panfrost_mmu.c | 63 +++-- bsp/drivers/gpu/panfrost/panfrost_perfcnt.c | 5 +- bsp/drivers/gpu/panfrost/panfrost_regs.h | 4 +- 14 files changed, 173 insertions(+), 471 deletions(-) diff --git a/bsp/drivers/gpu/panfrost/panfrost_devfreq.c b/bsp/drivers/gpu/panfrost/panfrost_devfreq.c index 2527894537..2dacb5e57f 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_devfreq.c +++ b/bsp/drivers/gpu/panfrost/panfrost_devfreq.c @@ -286,10 +286,8 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev) cur_freq = clk_get_rate(pfdev->clock); opp = devfreq_recommended_opp(dev, &cur_freq, 0); - if (IS_ERR(opp)) { - ret = PTR_ERR(opp); - goto err_opp_out; - } + if (IS_ERR(opp)) + return PTR_ERR(opp); panfrost_devfreq_profile.initial_freq = cur_freq; diff --git a/bsp/drivers/gpu/panfrost/panfrost_device.c b/bsp/drivers/gpu/panfrost/panfrost_device.c index 71570f4de9..f7647a0e0e 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_device.c +++ b/bsp/drivers/gpu/panfrost/panfrost_device.c @@ -12,6 +12,7 @@ #include "panfrost_device.h" #include "panfrost_devfreq.h" #include "panfrost_features.h" +#include "panfrost_issues.h" #include "panfrost_gpu.h" #include "panfrost_job.h" #include "panfrost_mmu.h" @@ -472,9 +473,13 @@ const char *panfrost_exception_name(u32 exception_code) bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev, u32 exception_code) { - /* Right now, none of the GPU we support need a reset, but this - * might change. + /* If an occlusion query write causes a bus fault on affected GPUs, + * future fragment jobs may hang. Reset to workaround. */ + if (exception_code == DRM_PANFROST_EXCEPTION_JOB_BUS_FAULT) + return panfrost_has_hw_issue(pfdev, HW_ISSUE_TTRX_3076); + + /* No other GPUs we support need a reset */ return false; } diff --git a/bsp/drivers/gpu/panfrost/panfrost_drv.c b/bsp/drivers/gpu/panfrost/panfrost_drv.c index d9bf5c4cea..6a0a7786f7 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_drv.c +++ b/bsp/drivers/gpu/panfrost/panfrost_drv.c @@ -88,7 +88,6 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct panfrost_gem_object *bo; struct drm_panfrost_create_bo *args = data; struct panfrost_gem_mapping *mapping; - int ret; if (!args->size || args->pad || (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) @@ -99,29 +98,21 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, !(args->flags & PANFROST_BO_NOEXEC)) return -EINVAL; - bo = panfrost_gem_create(dev, args->size, args->flags); + bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, + &args->handle); if (IS_ERR(bo)) return PTR_ERR(bo); - ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); - if (ret) - goto out; - mapping = panfrost_gem_mapping_get(bo, priv); - if (mapping) { - args->offset = mapping->mmnode.start << PAGE_SHIFT; - panfrost_gem_mapping_put(mapping); - } else { - /* This can only happen if the handle from - * drm_gem_handle_create() has already been guessed and freed - * by user space - */ - ret = -EINVAL; + if (!mapping) { + drm_gem_object_put(&bo->base.base); + return -EINVAL; } -out: - drm_gem_object_put(&bo->base.base); - return ret; + args->offset = mapping->mmnode.start << PAGE_SHIFT; + panfrost_gem_mapping_put(mapping); + + return 0; } /** @@ -233,7 +224,7 @@ panfrost_copy_in_sync(struct drm_device *dev, if (ret) goto fail; - ret = drm_gem_fence_array_add(&job->deps, fence); + ret = drm_sched_job_add_dependency(&job->base, fence); if (ret) goto fail; @@ -251,7 +242,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, struct drm_panfrost_submit *args = data; struct drm_syncobj *sync_out = NULL; struct panfrost_job *job; - int ret = 0; + int ret = 0, slot; if (!args->jc) return -EINVAL; @@ -268,38 +259,47 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, job = kzalloc(sizeof(*job), GFP_KERNEL); if (!job) { ret = -ENOMEM; - goto fail_out_sync; + goto out_put_syncout; } kref_init(&job->refcount); - xa_init_flags(&job->deps, XA_FLAGS_ALLOC); - job->pfdev = pfdev; job->jc = args->jc; job->requirements = args->requirements; job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); job->file_priv = file->driver_priv; + slot = panfrost_job_get_slot(job); + + ret = drm_sched_job_init(&job->base, + &job->file_priv->sched_entity[slot], + NULL); + if (ret) + goto out_put_job; + ret = panfrost_copy_in_sync(dev, file, args, job); if (ret) - goto fail_job; + goto out_cleanup_job; ret = panfrost_lookup_bos(dev, file, args, job); if (ret) - goto fail_job; + goto out_cleanup_job; ret = panfrost_job_push(job); if (ret) - goto fail_job; + goto out_cleanup_job; /* Update the return sync object for the job */ if (sync_out) drm_syncobj_replace_fence(sync_out, job->render_done_fence); -fail_job: +out_cleanup_job: + if (ret) + drm_sched_job_cleanup(&job->base); +out_put_job: panfrost_job_put(job); -fail_out_sync: +out_put_syncout: if (sync_out) drm_syncobj_put(sync_out); @@ -322,7 +322,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, if (!gem_obj) return -ENOENT; - ret = dma_resv_wait_timeout(gem_obj->resv, true, true, timeout); + ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ, + true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY; @@ -439,8 +440,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, #endif if (args->retained) { if (args->madv == PANFROST_MADV_DONTNEED) - list_move_tail(&bo->base.madv_list, - &pfdev->shrinker_list); + list_add_tail(&bo->base.madv_list, + &pfdev->shrinker_list); else if (args->madv == PANFROST_MADV_WILLNEED) list_del_init(&bo->base.madv_list); } @@ -574,7 +575,7 @@ static int panfrost_probe(struct platform_device *pdev) pfdev->coherent = device_get_dma_attr(&pdev->dev) == DEV_DMA_COHERENT; - /* Allocate and initialze the DRM device. */ + /* Allocate and initialize the DRM device. */ ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev); if (IS_ERR(ddev)) return PTR_ERR(ddev); @@ -650,8 +651,8 @@ static const struct panfrost_compatible amlogic_data = { .vendor_quirk = panfrost_gpu_amlogic_quirk, }; -const char * const mediatek_mt8183_supplies[] = { "mali", "sram" }; -const char * const mediatek_mt8183_pm_domains[] = { "core0", "core1", "core2" }; +static const char * const mediatek_mt8183_supplies[] = { "mali", "sram" }; +static const char * const mediatek_mt8183_pm_domains[] = { "core0", "core1", "core2" }; static const struct panfrost_compatible mediatek_mt8183_data = { .num_supplies = ARRAY_SIZE(mediatek_mt8183_supplies), .supply_names = mediatek_mt8183_supplies, diff --git a/bsp/drivers/gpu/panfrost/panfrost_features.h b/bsp/drivers/gpu/panfrost/panfrost_features.h index c145b1322c..7ed0cd3ea2 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_features.h +++ b/bsp/drivers/gpu/panfrost/panfrost_features.h @@ -12,24 +12,6 @@ enum panfrost_hw_feature { HW_FEATURE_JOBCHAIN_DISAMBIGUATION, HW_FEATURE_PWRON_DURING_PWROFF_TRANS, HW_FEATURE_XAFFINITY, - HW_FEATURE_OUT_OF_ORDER_EXEC, - HW_FEATURE_MRT, - HW_FEATURE_BRNDOUT_CC, - HW_FEATURE_INTERPIPE_REG_ALIASING, - HW_FEATURE_LD_ST_TILEBUFFER, - HW_FEATURE_MSAA_16X, - HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - HW_FEATURE_OPTIMIZED_COVERAGE_MASK, - HW_FEATURE_T7XX_PAIRING_RULES, - HW_FEATURE_LD_ST_LEA_TEX, - HW_FEATURE_LINEAR_FILTER_FLOAT, - HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, - HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS, - HW_FEATURE_TEST4_DATUM_MODE, - HW_FEATURE_NEXT_INSTRUCTION_TYPE, - HW_FEATURE_BRNDOUT_KILL, - HW_FEATURE_WARPING, HW_FEATURE_V4, HW_FEATURE_FLUSH_REDUCTION, HW_FEATURE_PROTECTED_MODE, @@ -38,132 +20,37 @@ enum panfrost_hw_feature { HW_FEATURE_AARCH64_MMU, HW_FEATURE_TLS_HASHING, HW_FEATURE_THREAD_GROUP_SPLIT, + HW_FEATURE_IDVS_GROUP_SIZE, + HW_FEATURE_CLEAN_ONLY_SAFE, HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, }; #define hw_features_t600 (\ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_V4)) -#define hw_features_t620 (\ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ - BIT_ULL(HW_FEATURE_V4)) - -#define hw_features_t720 (\ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_OPTIMIZED_COVERAGE_MASK) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ - BIT_ULL(HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_V4)) +#define hw_features_t620 hw_features_t600 +#define hw_features_t720 hw_features_t600 #define hw_features_t760 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) -// T860 -#define hw_features_t860 (\ - BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ - BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ - BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) +#define hw_features_t860 hw_features_t760 -#define hw_features_t880 hw_features_t860 +#define hw_features_t880 hw_features_t760 -#define hw_features_t830 (\ - BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ - BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ - BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) +#define hw_features_t830 hw_features_t760 -#define hw_features_t820 (\ - BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ - BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ - BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) +#define hw_features_t820 hw_features_t760 #define hw_features_g71 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ @@ -173,96 +60,29 @@ enum panfrost_hw_feature { BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ BIT_ULL(HW_FEATURE_COHERENCY_REG)) -#define hw_features_g51 (\ - BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ - BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ - BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ - BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ - BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ - BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ - BIT_ULL(HW_FEATURE_COHERENCY_REG)) +#define hw_features_g51 hw_features_g72 #define hw_features_g52 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \ BIT_ULL(HW_FEATURE_COHERENCY_REG)) #define hw_features_g76 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ @@ -270,27 +90,13 @@ enum panfrost_hw_feature { BIT_ULL(HW_FEATURE_COHERENCY_REG) | \ BIT_ULL(HW_FEATURE_AARCH64_MMU) | \ BIT_ULL(HW_FEATURE_TLS_HASHING) | \ + BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \ BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) #define hw_features_g31 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ @@ -304,21 +110,6 @@ enum panfrost_hw_feature { BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ diff --git a/bsp/drivers/gpu/panfrost/panfrost_gem.c b/bsp/drivers/gpu/panfrost/panfrost_gem.c index 1affe84b03..0cb5b5de3d 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_gem.c +++ b/bsp/drivers/gpu/panfrost/panfrost_gem.c @@ -249,7 +249,7 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (!obj) - return NULL; + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&obj->mappings.list); mutex_init(&obj->mappings.lock); @@ -264,8 +264,12 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t } struct panfrost_gem_object * -panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) +panfrost_gem_create_with_handle(struct drm_file *file_priv, + struct drm_device *dev, size_t size, + u32 flags, + uint32_t *handle) { + int ret; struct drm_gem_shmem_object *shmem; struct panfrost_gem_object *bo; @@ -281,6 +285,16 @@ panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->is_heap = !!(flags & PANFROST_BO_HEAP); + /* + * Allocate an id of idr table where the obj is registered + * and handle has the id what user can see. + */ + ret = drm_gem_handle_create(file_priv, &shmem->base, handle); + /* drop reference from allocate - handle holds it now. */ + drm_gem_object_put(&shmem->base); + if (ret) + return ERR_PTR(ret); + return bo; } diff --git a/bsp/drivers/gpu/panfrost/panfrost_gem.h b/bsp/drivers/gpu/panfrost/panfrost_gem.h index ad2877eeec..8088d5fd84 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_gem.h +++ b/bsp/drivers/gpu/panfrost/panfrost_gem.h @@ -69,7 +69,10 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, struct sg_table *sgt); struct panfrost_gem_object * -panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); +panfrost_gem_create_with_handle(struct drm_file *file_priv, + struct drm_device *dev, size_t size, + u32 flags, + uint32_t *handle); int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); void panfrost_gem_close(struct drm_gem_object *obj, diff --git a/bsp/drivers/gpu/panfrost/panfrost_gem_shrinker.c b/bsp/drivers/gpu/panfrost/panfrost_gem_shrinker.c index 62eeffb2aa..df2afdcbad 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_gem_shrinker.c +++ b/bsp/drivers/gpu/panfrost/panfrost_gem_shrinker.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2019 Arm Ltd. * * Based on msm_gem_freedreno.c: diff --git a/bsp/drivers/gpu/panfrost/panfrost_gpu.c b/bsp/drivers/gpu/panfrost/panfrost_gpu.c index fd8e449921..9e91f4a541 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_gpu.c +++ b/bsp/drivers/gpu/panfrost/panfrost_gpu.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 - 2023 Allwinner Technology Co.,Ltd. All rights reserved. */ /* Copyright 2018 Marty E. Plummer */ /* Copyright 2019 Linaro, Ltd., Rob Herring */ /* Copyright 2019 Collabora ltd. */ @@ -22,13 +23,9 @@ static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) { struct panfrost_device *pfdev = data; - u32 fault_status, state; + u32 state = gpu_read(pfdev, GPU_INT_STAT); + u32 fault_status = gpu_read(pfdev, GPU_FAULT_STATUS); - if (test_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended)) - return IRQ_NONE; - - fault_status = gpu_read(pfdev, GPU_FAULT_STATUS); - state = gpu_read(pfdev, GPU_INT_STAT); if (!state) return IRQ_NONE; @@ -64,39 +61,18 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev) gpu_write(pfdev, GPU_INT_MASK, 0); gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED); - - clear_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended); - gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET); + ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT, - val, val & GPU_IRQ_RESET_COMPLETED, 10, 10000); + val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000); if (ret) { - dev_err(pfdev->dev, "gpu soft reset timed out, attempting hard reset\n"); - - gpu_write(pfdev, GPU_CMD, GPU_CMD_HARD_RESET); - ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT, val, - val & GPU_IRQ_RESET_COMPLETED, 100, 10000); - if (ret) { - dev_err(pfdev->dev, "gpu hard reset timed out\n"); - return ret; - } + dev_err(pfdev->dev, "gpu soft reset timed out\n"); + return ret; } gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL); - - /* Only enable the interrupts we care about */ - gpu_write(pfdev, GPU_INT_MASK, - GPU_IRQ_MASK_ERROR | - GPU_IRQ_PERFCNT_SAMPLE_COMPLETED | - GPU_IRQ_CLEAN_CACHES_COMPLETED); - - /* - * All in-flight jobs should have released their cycle - * counter references upon reset, but let us make sure - */ - if (drm_WARN_ON(pfdev->ddev, atomic_read(&pfdev->cycle_counter.use_count) != 0)) - atomic_set(&pfdev->cycle_counter.use_count, 0); + gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL); return 0; } @@ -229,14 +205,6 @@ static const struct panfrost_model gpu_models[] = { GPU_MODEL(g57, 0x9001, GPU_REV(g57, 0, 0)), - - /* MediaTek MT8192 has a Mali-G57 with a different GPU ID from the - * standard. Arm's driver does not appear to handle this model. - * ChromeOS has a hack downstream for it. Treat it as equivalent to - * standard Mali-G57 for now. - */ - GPU_MODEL(g57, 0x9003, - GPU_REV(g57, 0, 0)), }; static void panfrost_gpu_init_features(struct panfrost_device *pfdev) @@ -346,76 +314,32 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.shader_present, pfdev->features.l2_present); } -void panfrost_cycle_counter_get(struct panfrost_device *pfdev) -{ - if (atomic_inc_not_zero(&pfdev->cycle_counter.use_count)) - return; - - spin_lock(&pfdev->cycle_counter.lock); - if (atomic_inc_return(&pfdev->cycle_counter.use_count) == 1) - gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_START); - spin_unlock(&pfdev->cycle_counter.lock); -} - -void panfrost_cycle_counter_put(struct panfrost_device *pfdev) -{ - if (atomic_add_unless(&pfdev->cycle_counter.use_count, -1, 1)) - return; - - spin_lock(&pfdev->cycle_counter.lock); - if (atomic_dec_return(&pfdev->cycle_counter.use_count) == 0) - gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP); - spin_unlock(&pfdev->cycle_counter.lock); -} - -unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev) -{ - u32 hi, lo; - - do { - hi = gpu_read(pfdev, GPU_CYCLE_COUNT_HI); - lo = gpu_read(pfdev, GPU_CYCLE_COUNT_LO); - } while (hi != gpu_read(pfdev, GPU_CYCLE_COUNT_HI)); - - return ((u64)hi << 32) | lo; -} - -static u64 panfrost_get_core_mask(struct panfrost_device *pfdev) -{ - u64 core_mask; - - if (pfdev->features.l2_present == 1) - return U64_MAX; - - /* - * Only support one core group now. - * ~(l2_present - 1) unsets all bits in l2_present except - * the bottom bit. (l2_present - 2) has all the bits in - * the first core group set. AND them together to generate - * a mask of cores in the first core group. - */ - core_mask = ~(pfdev->features.l2_present - 1) & - (pfdev->features.l2_present - 2); - dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n", - hweight64(core_mask), - hweight64(pfdev->features.shader_present)); - - return core_mask; -} - void panfrost_gpu_power_on(struct panfrost_device *pfdev) { int ret; u32 val; - u64 core_mask; + u64 core_mask = U64_MAX; panfrost_gpu_init_quirks(pfdev); - core_mask = panfrost_get_core_mask(pfdev); + if (pfdev->features.l2_present != 1) { + /* + * Only support one core group now. + * ~(l2_present - 1) unsets all bits in l2_present except + * the bottom bit. (l2_present - 2) has all the bits in + * the first core group set. AND them together to generate + * a mask of cores in the first core group. + */ + core_mask = ~(pfdev->features.l2_present - 1) & + (pfdev->features.l2_present - 2); + dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n", + hweight64(core_mask), + hweight64(pfdev->features.shader_present)); + } gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present & core_mask); ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO, val, val == (pfdev->features.l2_present & core_mask), - 10, 20000); + 100, 20000); if (ret) dev_err(pfdev->dev, "error powering up gpu L2"); @@ -423,52 +347,27 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev) pfdev->features.shader_present & core_mask); ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO, val, val == (pfdev->features.shader_present & core_mask), - 10, 20000); + 100, 20000); if (ret) dev_err(pfdev->dev, "error powering up gpu shader"); gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO, - val, val == pfdev->features.tiler_present, 10, 1000); + val, val == pfdev->features.tiler_present, 100, 1000); if (ret) dev_err(pfdev->dev, "error powering up gpu tiler"); } void panfrost_gpu_power_off(struct panfrost_device *pfdev) { - int ret; - u32 val; - - gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present); - ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO, - val, !val, 1, 2000); - if (ret) - dev_err(pfdev->dev, "shader power transition timeout"); - - gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present); - ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO, - val, !val, 1, 2000); - if (ret) - dev_err(pfdev->dev, "tiler power transition timeout"); - - gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present); - ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO, - val, !val, 0, 2000); - if (ret) - dev_err(pfdev->dev, "l2 power transition timeout"); -} - -void panfrost_gpu_suspend_irq(struct panfrost_device *pfdev) -{ - set_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended); - - gpu_write(pfdev, GPU_INT_MASK, 0); - synchronize_irq(pfdev->gpu_irq); + gpu_write(pfdev, TILER_PWROFF_LO, 0); + gpu_write(pfdev, SHADER_PWROFF_LO, 0); + gpu_write(pfdev, L2_PWROFF_LO, 0); } int panfrost_gpu_init(struct panfrost_device *pfdev) { - int err; + int err, irq; err = panfrost_gpu_soft_reset(pfdev); if (err) @@ -483,11 +382,11 @@ int panfrost_gpu_init(struct panfrost_device *pfdev) dma_set_max_seg_size(pfdev->dev, UINT_MAX); - pfdev->gpu_irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); - if (pfdev->gpu_irq < 0) - return pfdev->gpu_irq; + irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); + if (irq <= 0) + return -ENODEV; - err = devm_request_irq(pfdev->dev, pfdev->gpu_irq, panfrost_gpu_irq_handler, + err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler, IRQF_SHARED, KBUILD_MODNAME "-gpu", pfdev); if (err) { dev_err(pfdev->dev, "failed to request gpu irq"); diff --git a/bsp/drivers/gpu/panfrost/panfrost_issues.h b/bsp/drivers/gpu/panfrost/panfrost_issues.h index c763de6781..eb60cb8366 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_issues.h +++ b/bsp/drivers/gpu/panfrost/panfrost_issues.h @@ -265,7 +265,7 @@ enum panfrost_hw_issue { #define hw_issues_g57_r0p0 (\ BIT_ULL(HW_ISSUE_TTRX_3485)) -static inline bool panfrost_has_hw_issue(struct panfrost_device *pfdev, +static inline bool panfrost_has_hw_issue(const struct panfrost_device *pfdev, enum panfrost_hw_issue issue) { return test_bit(issue, pfdev->features.hw_issues); diff --git a/bsp/drivers/gpu/panfrost/panfrost_job.c b/bsp/drivers/gpu/panfrost/panfrost_job.c index f3328f83dd..d5412a47c4 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_job.c +++ b/bsp/drivers/gpu/panfrost/panfrost_job.c @@ -102,7 +102,7 @@ static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, in return &fence->base; } -static int panfrost_job_get_slot(struct panfrost_job *job) +int panfrost_job_get_slot(struct panfrost_job *job) { /* JS0: fragment jobs. * JS1: vertex/tiler jobs @@ -137,8 +137,8 @@ static void panfrost_job_write_affinity(struct panfrost_device *pfdev, */ affinity = pfdev->features.shader_present; - job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF); - job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32); + job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity)); + job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity)); } static u32 @@ -203,8 +203,8 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu); - job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF); - job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32); + job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head)); + job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head)); panfrost_job_write_affinity(pfdev, job->requirements, js); @@ -242,13 +242,18 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) static int panfrost_acquire_object_fences(struct drm_gem_object **bos, int bo_count, - struct xarray *deps) + struct drm_sched_job *job) { int i, ret; for (i = 0; i < bo_count; i++) { + ret = dma_resv_reserve_fences(bos[i]->resv, 1); + if (ret) + return ret; + /* panfrost always uses write mode in its current uapi */ - ret = drm_gem_fence_array_add_implicit(deps, bos[i], true); + ret = drm_sched_job_add_implicit_dependencies(job, bos[i], + true); if (ret) return ret; } @@ -263,35 +268,27 @@ static void panfrost_attach_object_fences(struct drm_gem_object **bos, int i; for (i = 0; i < bo_count; i++) - dma_resv_add_excl_fence(bos[i]->resv, fence); + dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); } int panfrost_job_push(struct panfrost_job *job) { struct panfrost_device *pfdev = job->pfdev; - int slot = panfrost_job_get_slot(job); - struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot]; struct ww_acquire_ctx acquire_ctx; int ret = 0; - ret = drm_gem_lock_reservations(job->bos, job->bo_count, &acquire_ctx); if (ret) return ret; mutex_lock(&pfdev->sched_lock); - - ret = drm_sched_job_init(&job->base, entity, NULL); - if (ret) { - mutex_unlock(&pfdev->sched_lock); - goto unlock; - } + drm_sched_job_arm(&job->base); job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); ret = panfrost_acquire_object_fences(job->bos, job->bo_count, - &job->deps); + &job->base); if (ret) { mutex_unlock(&pfdev->sched_lock); goto unlock; @@ -299,7 +296,7 @@ int panfrost_job_push(struct panfrost_job *job) kref_get(&job->refcount); /* put by scheduler job completion */ - drm_sched_entity_push_job(&job->base, entity); + drm_sched_entity_push_job(&job->base); mutex_unlock(&pfdev->sched_lock); @@ -316,15 +313,8 @@ static void panfrost_job_cleanup(struct kref *ref) { struct panfrost_job *job = container_of(ref, struct panfrost_job, refcount); - struct dma_fence *fence; - unsigned long index; unsigned int i; - xa_for_each(&job->deps, index, fence) { - dma_fence_put(fence); - } - xa_destroy(&job->deps); - dma_fence_put(job->done_fence); dma_fence_put(job->render_done_fence); @@ -363,17 +353,6 @@ static void panfrost_job_free(struct drm_sched_job *sched_job) panfrost_job_put(job); } -static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job, - struct drm_sched_entity *s_entity) -{ - struct panfrost_job *job = to_panfrost_job(sched_job); - - if (!xa_empty(&job->deps)) - return xa_erase(&job->deps, job->last_dep++); - - return NULL; -} - static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) { struct panfrost_job *job = to_panfrost_job(sched_job); @@ -773,7 +752,6 @@ static void panfrost_reset_work(struct work_struct *work) } static const struct drm_sched_backend_ops panfrost_sched_ops = { - .dependency = panfrost_job_dependency, .run_job = panfrost_job_run, .timedout_job = panfrost_job_timedout, .free_job = panfrost_job_free @@ -822,7 +800,7 @@ int panfrost_job_init(struct panfrost_device *pfdev) INIT_WORK(&pfdev->reset.work, panfrost_reset_work); spin_lock_init(&js->job_lock); - js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "JOB"); + js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); if (js->irq <= 0) return -ENODEV; diff --git a/bsp/drivers/gpu/panfrost/panfrost_job.h b/bsp/drivers/gpu/panfrost/panfrost_job.h index 09ae9394fc..a3a2c6ce53 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_job.h +++ b/bsp/drivers/gpu/panfrost/panfrost_job.h @@ -19,10 +19,6 @@ struct panfrost_job { struct panfrost_device *pfdev; struct panfrost_file_priv *file_priv; - /* Contains both explicit and implicit fences */ - struct xarray deps; - unsigned long last_dep; - /* Fence to be signaled by IRQ handler when the job is complete. */ struct dma_fence *done_fence; @@ -42,6 +38,7 @@ int panfrost_job_init(struct panfrost_device *pfdev); void panfrost_job_fini(struct panfrost_device *pfdev); int panfrost_job_open(struct panfrost_file_priv *panfrost_priv); void panfrost_job_close(struct panfrost_file_priv *panfrost_priv); +int panfrost_job_get_slot(struct panfrost_job *job); int panfrost_job_push(struct panfrost_job *job); void panfrost_job_put(struct panfrost_job *job); void panfrost_job_enable_interrupts(struct panfrost_device *pfdev); diff --git a/bsp/drivers/gpu/panfrost/panfrost_mmu.c b/bsp/drivers/gpu/panfrost/panfrost_mmu.c index 3f73f331a3..ce0c584b38 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_mmu.c +++ b/bsp/drivers/gpu/panfrost/panfrost_mmu.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 /* Copyright 2019 Linaro, Ltd, Rob Herring */ #include "panfrost_drm.h" @@ -58,21 +58,37 @@ static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd) } static void lock_region(struct panfrost_device *pfdev, u32 as_nr, - u64 iova, u64 size) + u64 region_start, u64 size) { u8 region_width; - u64 region = iova & PAGE_MASK; + u64 region; + u64 region_end = region_start + size; - /* The size is encoded as ceil(log2) minus(1), which may be calculated - * with fls. The size must be clamped to hardware bounds. + if (!size) + return; + + /* + * The locked region is a naturally aligned power of 2 block encoded as + * log2 minus(1). + * Calculate the desired start/end and look for the highest bit which + * differs. The smallest naturally aligned block must include this bit + * change, the desired region starts with this bit (and subsequent bits) + * zeroed and ends with the bit (and subsequent bits) set to one. + */ + region_width = max(fls64(region_start ^ (region_end - 1)), + const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1; + + /* + * Mask off the low bits of region_start (which would be ignored by + * the hardware anyway) */ - size = max_t(u64, size, AS_LOCK_REGION_MIN_SIZE); - region_width = fls64(size - 1) - 1; - region |= region_width; + region_start &= GENMASK_ULL(63, region_width); + + region = region_width | region_start; /* Lock the region that needs to be updated */ - mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), region & 0xFFFFFFFFUL); - mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), (region >> 32) & 0xFFFFFFFFUL); + mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region)); + mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region)); write_cmd(pfdev, as_nr, AS_COMMAND_LOCK); } @@ -114,14 +130,14 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); - mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL); - mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32); + mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab)); + mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab)); /* Need to revisit mem attrs. * NC is the default, Mali driver is inner WT. */ - mmu_write(pfdev, AS_MEMATTR_LO(as_nr), memattr & 0xffffffffUL); - mmu_write(pfdev, AS_MEMATTR_HI(as_nr), memattr >> 32); + mmu_write(pfdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr)); + mmu_write(pfdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr)); write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); } @@ -253,7 +269,7 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, if (pm_runtime_active(pfdev->dev)) mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT); - pm_runtime_put_autosuspend(pfdev->dev); + pm_runtime_put_sync_autosuspend(pfdev->dev); } static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, @@ -288,7 +304,8 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, int panfrost_mmu_map(struct panfrost_gem_mapping *mapping) { struct panfrost_gem_object *bo = mapping->obj; - struct drm_gem_object *obj = &bo->base.base; + struct drm_gem_shmem_object *shmem = &bo->base; + struct drm_gem_object *obj = &shmem->base; struct panfrost_device *pfdev = to_panfrost_device(obj->dev); struct sg_table *sgt; int prot = IOMMU_READ | IOMMU_WRITE; @@ -298,11 +315,8 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping) if (bo->noexec) prot |= IOMMU_NOEXEC; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 147)) - sgt = drm_gem_shmem_get_pages_sgt(&bo->base); -#else - sgt = drm_gem_shmem_get_pages_sgt(obj); -#endif + + sgt = drm_gem_shmem_get_pages_sgt(shmem); if (WARN_ON(IS_ERR(sgt))) return PTR_ERR(sgt); @@ -351,7 +365,7 @@ static void mmu_tlb_inv_context_s1(void *cookie) static void mmu_tlb_sync_context(void *cookie) { - // struct panfrost_mmu *mmu = cookie; + //struct panfrost_mmu *mmu = cookie; // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X } @@ -471,7 +485,6 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, if (IS_ERR(pages[i])) { mutex_unlock(&bo->base.pages_lock); ret = PTR_ERR(pages[i]); - pages[i] = NULL; goto err_pages; } } @@ -505,7 +518,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, err_pages: drm_gem_shmem_put_pages(&bo->base); err_bo: - panfrost_gem_mapping_put(bomapping); + drm_gem_object_put(&bo->base.base); return ret; } @@ -720,7 +733,7 @@ int panfrost_mmu_init(struct panfrost_device *pfdev) { int err, irq; - irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "MMU"); + irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu"); if (irq <= 0) return -ENODEV; diff --git a/bsp/drivers/gpu/panfrost/panfrost_perfcnt.c b/bsp/drivers/gpu/panfrost/panfrost_perfcnt.c index b2a0317ef8..b6fed83576 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_perfcnt.c +++ b/bsp/drivers/gpu/panfrost/panfrost_perfcnt.c @@ -12,6 +12,7 @@ #endif #include +#include #include #include #include @@ -56,8 +57,8 @@ static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev) reinit_completion(&pfdev->perfcnt->dump_comp); gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT; - gpu_write(pfdev, GPU_PERFCNT_BASE_LO, gpuva); - gpu_write(pfdev, GPU_PERFCNT_BASE_HI, gpuva >> 32); + gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva)); + gpu_write(pfdev, GPU_PERFCNT_BASE_HI, upper_32_bits(gpuva)); gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_CLEAN_CACHES_COMPLETED | GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); diff --git a/bsp/drivers/gpu/panfrost/panfrost_regs.h b/bsp/drivers/gpu/panfrost/panfrost_regs.h index 6c5a11ef1e..accb4fa3ad 100644 --- a/bsp/drivers/gpu/panfrost/panfrost_regs.h +++ b/bsp/drivers/gpu/panfrost/panfrost_regs.h @@ -195,6 +195,7 @@ #define SC_TLS_HASH_ENABLE BIT(17) #define SC_LS_ATTR_CHECK_DISABLE BIT(18) #define SC_ENABLE_TEXGRD_FLAGS BIT(25) +#define SC_VAR_ALGORITHM BIT(29) /* End SHADER_CONFIG register */ /* TILER_CONFIG register */ @@ -208,6 +209,7 @@ #define JM_MAX_JOB_THROTTLE_LIMIT 0x3F #define JM_FORCE_COHERENCY_FEATURES_SHIFT 2 #define JM_IDVS_GROUP_SIZE_SHIFT 16 +#define JM_DEFAULT_IDVS_GROUP_SIZE 0xF #define JM_MAX_IDVS_GROUP_SIZE 0x3F @@ -292,7 +294,7 @@ #define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20) /* (RO) Fault Address for address space n, low word */ #define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24) /* (RO) Fault Address for address space n, high word */ #define AS_STATUS(as) (MMU_AS(as) + 0x28) /* (RO) Status flags for address space n */ -/* Additional Bifrost AS regsiters */ +/* Additional Bifrost AS registers */ #define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) /* (RW) Translation table configuration for address space n, low word */ #define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) /* (RW) Translation table configuration for address space n, high word */ #define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) /* (RO) Secondary fault address for address space n, low word */