Skip to content

Commit

Permalink
drm/vc4: Cache LBM allocations to avoid double-buffering
Browse files Browse the repository at this point in the history
LBM is only relevant for each active dlist, so there is
no need to double-buffer the allocations.

Cache the allocations per plane so that we can ensure the
allocations are possible.

Signed-off-by: Dave Stevenson <[email protected]>
  • Loading branch information
6by9 committed Sep 27, 2024
1 parent be6bba7 commit 21bec0f
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 19 deletions.
18 changes: 17 additions & 1 deletion drivers/gpu/drm/vc4/vc4_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,19 @@ struct vc4_v3d {
struct debugfs_regset32 regset;
};

#define VC4_NUM_LBM_HANDLES 64
struct vc4_lbm_refcounts {
refcount_t refcount;

/* Allocation size */
size_t size;
/* Our allocation in LBM. */
struct drm_mm_node lbm;

/* Pointer back to the HVS structure */
struct vc4_hvs *hvs;
};

#define VC4_NUM_UPM_HANDLES 32
struct vc4_upm_refcounts {
refcount_t refcount;
Expand Down Expand Up @@ -358,8 +371,11 @@ struct vc4_hvs {
* list. Units are dwords.
*/
struct drm_mm dlist_mm;

/* Memory manager for the LBM memory used by HVS scaling. */
struct drm_mm lbm_mm;
struct ida lbm_handles;
struct vc4_lbm_refcounts lbm_refcounts[VC4_NUM_LBM_HANDLES + 1];

/* Memory manager for the UPM memory used for prefetching. */
struct drm_mm upm_mm;
Expand Down Expand Up @@ -459,7 +475,7 @@ struct vc4_plane_state {
bool is_yuv;

/* Our allocation in LBM for temporary storage during scaling. */
struct drm_mm_node lbm;
unsigned int lbm_handle;

/* Our allocation in UPM for prefetching. */
struct drm_mm_node upm[DRM_FORMAT_MAX_PLANES];
Expand Down
24 changes: 24 additions & 0 deletions drivers/gpu/drm/vc4/vc4_hvs.c
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,27 @@ static int vc6_hvs_debugfs_upm_allocs(struct seq_file *m, void *data)
return 0;
}

static int vc4_hvs_debugfs_lbm_allocs(struct seq_file *m, void *data)
{
struct drm_debugfs_entry *entry = m->private;
struct drm_device *dev = entry->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_hvs *hvs = vc4->hvs;
struct drm_printer p = drm_seq_file_printer(m);
struct vc4_lbm_refcounts *refcount;
unsigned int i;

drm_printf(&p, "LBM Handles:\n");
for (i = 0; i < VC4_NUM_LBM_HANDLES; i++) {
refcount = &hvs->lbm_refcounts[i];
drm_printf(&p, "handle %u: refcount %u, size %zu [%08llx + %08llx]\n",
i, refcount_read(&refcount->refcount), refcount->size,
refcount->lbm.start, refcount->lbm.size);
}

return 0;
}

/* The filter kernel is composed of dwords each containing 3 9-bit
* signed integers packed next to each other.
*/
Expand Down Expand Up @@ -1714,6 +1735,8 @@ int vc4_hvs_debugfs_init(struct drm_minor *minor)
drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL);
}

drm_debugfs_add_file(drm, "hvs_lbm", vc4_hvs_debugfs_lbm_allocs, NULL);

drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL);

drm_debugfs_add_file(drm, "hvs_dlist_allocs", vc4_hvs_debugfs_dlist_allocs, NULL);
Expand Down Expand Up @@ -1819,6 +1842,7 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
}

drm_mm_init(&hvs->lbm_mm, 0, lbm_size);
ida_init(&hvs->lbm_handles);

if (vc4->gen >= VC4_GEN_6) {
ida_init(&hvs->upm_handles);
Expand Down
99 changes: 81 additions & 18 deletions drivers/gpu/drm/vc4/vc4_plane.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
if (vc4_state->upm_handle[i])
refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
}

memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
if (vc4_state->lbm_handle)
refcount_inc(&hvs->lbm_refcounts[vc4_state->lbm_handle].refcount);

vc4_state->dlist_initialized = 0;

Expand All @@ -315,6 +315,21 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
return &vc4_state->base;
}

void vc4_plane_release_lbm_ida(struct vc4_hvs *hvs, unsigned int lbm_handle)
{
struct vc4_lbm_refcounts *refcount = &hvs->lbm_refcounts[lbm_handle];
unsigned long irqflags;

spin_lock_irqsave(&hvs->mm_lock, irqflags);
drm_mm_remove_node(&refcount->lbm);
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
refcount->lbm.start = 0;
refcount->lbm.size = 0;
refcount->size = 0;

ida_free(&hvs->lbm_handles, lbm_handle);
}

void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
{
struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
Expand All @@ -338,12 +353,13 @@ void vc4_plane_destroy_state(struct drm_plane *plane,
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
unsigned int i;

if (drm_mm_node_allocated(&vc4_state->lbm)) {
unsigned long irqflags;
if (vc4_state->lbm_handle) {
struct vc4_lbm_refcounts *refcount;

spin_lock_irqsave(&hvs->mm_lock, irqflags);
drm_mm_remove_node(&vc4_state->lbm);
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
refcount = &hvs->lbm_refcounts[vc4_state->lbm_handle];

if (refcount_dec_and_test(&refcount->refcount))
vc4_plane_release_lbm_ida(hvs, vc4_state->lbm_handle);
}

for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
Expand Down Expand Up @@ -922,10 +938,14 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
{
struct drm_device *drm = state->plane->dev;
struct vc4_dev *vc4 = to_vc4_dev(drm);
struct vc4_hvs *hvs = vc4->hvs;
struct drm_plane *plane = state->plane;
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
struct vc4_lbm_refcounts *refcount;
unsigned long irqflags;
int lbm_handle;
u32 lbm_size;
int ret;

lbm_size = vc4_lbm_size(state);
if (!lbm_size)
Expand All @@ -949,29 +969,71 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
/* Allocate the LBM memory that the HVS will use for temporary
* storage due to our scaling/format conversion.
*/
if (!drm_mm_node_allocated(&vc4_state->lbm)) {
int ret;
lbm_handle = vc4_state->lbm_handle;
if (lbm_handle &&
hvs->lbm_refcounts[lbm_handle].size == lbm_size) {
/* Allocation is the same size as the previous user of
* the plane. Keep the allocation.
*/
vc4_state->lbm_handle = lbm_handle;
} else {
if (lbm_handle &&
refcount_dec_and_test(&hvs->lbm_refcounts[lbm_handle].refcount)) {
vc4_plane_release_lbm_ida(hvs, lbm_handle);
vc4_state->lbm_handle = 0;
}

lbm_handle = ida_alloc_range(&hvs->lbm_handles, 1,
VC4_NUM_LBM_HANDLES,
GFP_KERNEL);
if (lbm_handle < 0) {
drm_err(drm, "Out of lbm_handles\n");
return lbm_handle;
}
vc4_state->lbm_handle = lbm_handle;

refcount = &hvs->lbm_refcounts[lbm_handle];
refcount_set(&refcount->refcount, 1);
refcount->size = lbm_size;

spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
spin_lock_irqsave(&hvs->mm_lock, irqflags);
ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
&vc4_state->lbm,
&refcount->lbm,
lbm_size, 1,
0, 0);
spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);

if (ret) {
drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
refcount_set(&refcount->refcount, 0);
ida_free(&hvs->lbm_handles, lbm_handle);
vc4_state->lbm_handle = 0;
return ret;
}
} else {
WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
}

vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
vc4_state->dlist[vc4_state->lbm_offset] = hvs->lbm_refcounts[lbm_handle].lbm.start;

return 0;
}

static void vc4_plane_free_lbm(struct drm_plane_state *state)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
struct drm_device *drm = state->plane->dev;
struct vc4_dev *vc4 = to_vc4_dev(drm);
struct vc4_hvs *hvs = vc4->hvs;
unsigned int lbm_handle;

lbm_handle = vc4_state->lbm_handle;
if (!lbm_handle)
return;

if (refcount_dec_and_test(&hvs->lbm_refcounts[lbm_handle].refcount))
vc4_plane_release_lbm_ida(hvs, lbm_handle);
vc4_state->lbm_handle = 0;
}

static int vc6_plane_allocate_upm(struct drm_plane_state *state)
{
const struct drm_format_info *info = state->fb->format;
Expand Down Expand Up @@ -2148,9 +2210,10 @@ int vc4_plane_atomic_check(struct drm_plane *plane,
struct drm_plane_state *old_plane_state =
drm_atomic_get_old_plane_state(state, plane);

if (vc4->gen >= VC4_GEN_6 && old_plane_state &&
plane_enabled(old_plane_state)) {
vc6_plane_free_upm(new_plane_state);
if (old_plane_state && plane_enabled(old_plane_state)) {
if (vc4->gen >= VC4_GEN_6)
vc6_plane_free_upm(new_plane_state);
vc4_plane_free_lbm(new_plane_state);
}
return 0;
}
Expand Down

0 comments on commit 21bec0f

Please sign in to comment.