Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

drm/vc4: Rework UPM allocation to avoid double buffering #6385

Merged
merged 4 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion drivers/gpu/drm/vc4/tests/vc4_test_lbm_size.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ static void drm_vc4_test_vc4_lbm_size(struct kunit *test)
struct drm_framebuffer *fb;
struct drm_plane *plane;
struct drm_crtc *crtc;
struct vc4_dev *vc4;
unsigned int i;
int ret;

Expand Down Expand Up @@ -248,7 +249,12 @@ static void drm_vc4_test_vc4_lbm_size(struct kunit *test)
ret = drm_atomic_check_only(state);
KUNIT_ASSERT_EQ(test, ret, 0);

KUNIT_EXPECT_EQ(test, vc4_plane_state->lbm_size, params->expected_lbm_size);
vc4 = to_vc4_dev(state->dev);
KUNIT_ASSERT_NOT_NULL(test, vc4);
KUNIT_ASSERT_NOT_NULL(test, vc4->hvs);
KUNIT_EXPECT_EQ(test,
vc4->hvs->lbm_refcounts[vc4_plane_state->lbm_handle].size,
params->expected_lbm_size);

for (i = 0; i < 2; i++) {
KUNIT_EXPECT_EQ(test,
Expand Down
39 changes: 32 additions & 7 deletions drivers/gpu/drm/vc4/vc4_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,32 @@ struct vc4_v3d {
struct debugfs_regset32 regset;
};

#define VC4_NUM_LBM_HANDLES 64
struct vc4_lbm_refcounts {
refcount_t refcount;

/* Allocation size */
size_t size;
/* Our allocation in LBM. */
struct drm_mm_node lbm;

/* Pointer back to the HVS structure */
struct vc4_hvs *hvs;
};

#define VC4_NUM_UPM_HANDLES 32
struct vc4_upm_refcounts {
refcount_t refcount;

/* Allocation size */
size_t size;
/* Our allocation in UPM for prefetching. */
struct drm_mm_node upm;

/* Pointer back to the HVS structure */
struct vc4_hvs *hvs;
};

#define HVS_NUM_CHANNELS 3

struct vc4_hvs {
Expand All @@ -345,12 +371,16 @@ struct vc4_hvs {
* list. Units are dwords.
*/
struct drm_mm dlist_mm;

/* Memory manager for the LBM memory used by HVS scaling. */
struct drm_mm lbm_mm;
struct ida lbm_handles;
struct vc4_lbm_refcounts lbm_refcounts[VC4_NUM_LBM_HANDLES + 1];

/* Memory manager for the UPM memory used for prefetching. */
struct drm_mm upm_mm;
struct ida upm_handles;
struct vc4_upm_refcounts upm_refcounts[VC4_NUM_UPM_HANDLES + 1];

spinlock_t mm_lock;

Expand Down Expand Up @@ -419,8 +449,6 @@ struct vc4_plane_state {
u32 dlist_size; /* Number of dwords allocated for the display list */
u32 dlist_count; /* Number of used dwords in the display list. */

u32 lbm_size; /* LBM requirements for this plane */

/* Offset in the dlist to various words, for pageflip or
* cursor updates.
*/
Expand All @@ -446,8 +474,8 @@ struct vc4_plane_state {
bool is_unity;
bool is_yuv;

/* Our allocation in UPM for prefetching. */
struct drm_mm_node upm[DRM_FORMAT_MAX_PLANES];
/* Our allocation in LBM for temporary storage during scaling. */
unsigned int lbm_handle;

/* The Unified Pre-Fetcher Handle */
unsigned int upm_handle[DRM_FORMAT_MAX_PLANES];
Expand Down Expand Up @@ -640,9 +668,6 @@ struct vc4_crtc {
* access to that value.
*/
unsigned int current_hvs_channel;

/* @lbm: Our allocation in LBM for temporary storage during scaling. */
struct drm_mm_node lbm;
};

#define to_vc4_crtc(_crtc) \
Expand Down
104 changes: 56 additions & 48 deletions drivers/gpu/drm/vc4/vc4_hvs.c
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,48 @@ static int vc4_hvs_debugfs_dlist_allocs(struct seq_file *m, void *data)
return 0;
}

static int vc6_hvs_debugfs_upm_allocs(struct seq_file *m, void *data)
{
struct drm_debugfs_entry *entry = m->private;
struct drm_device *dev = entry->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_hvs *hvs = vc4->hvs;
struct drm_printer p = drm_seq_file_printer(m);
struct vc4_upm_refcounts *refcount;
unsigned int i;

drm_printf(&p, "UPM Handles:\n");
for (i = 0; i < VC4_NUM_UPM_HANDLES; i++) {
refcount = &hvs->upm_refcounts[i];
drm_printf(&p, "handle %u: refcount %u, size %zu [%08llx + %08llx]\n",
i, refcount_read(&refcount->refcount), refcount->size,
refcount->upm.start, refcount->upm.size);
}

return 0;
}

static int vc4_hvs_debugfs_lbm_allocs(struct seq_file *m, void *data)
{
struct drm_debugfs_entry *entry = m->private;
struct drm_device *dev = entry->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_hvs *hvs = vc4->hvs;
struct drm_printer p = drm_seq_file_printer(m);
struct vc4_lbm_refcounts *refcount;
unsigned int i;

drm_printf(&p, "LBM Handles:\n");
for (i = 0; i < VC4_NUM_LBM_HANDLES; i++) {
refcount = &hvs->lbm_refcounts[i];
drm_printf(&p, "handle %u: refcount %u, size %zu [%08llx + %08llx]\n",
i, refcount_read(&refcount->refcount), refcount->size,
refcount->lbm.start, refcount->lbm.size);
}

return 0;
}

/* The filter kernel is composed of dwords each containing 3 9-bit
* signed integers packed next to each other.
*/
Expand Down Expand Up @@ -1261,7 +1303,6 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
struct drm_plane *plane;
const struct drm_plane_state *plane_state;
u32 dlist_count = 0;
u32 lbm_count = 0;

/* The pixelvalve can only feed one encoder (and encoders are
* 1:1 with connectors.)
Expand All @@ -1270,8 +1311,6 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
return -EINVAL;

drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
const struct vc4_plane_state *vc4_plane_state =
to_vc4_plane_state(plane_state);
u32 plane_dlist_count = vc4_plane_dlist_size(plane_state);

drm_dbg_driver(dev, "[CRTC:%d:%s] Found [PLANE:%d:%s] with DLIST size: %u\n",
Expand All @@ -1280,7 +1319,6 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
plane_dlist_count);

dlist_count += plane_dlist_count;
lbm_count += vc4_plane_state->lbm_size;
}

dlist_count++; /* Account for SCALER_CTL0_END. */
Expand All @@ -1294,8 +1332,6 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)

vc4_state->mm = alloc;

/* FIXME: Check total lbm allocation here */

return vc4_hvs_gamma_check(crtc, state);
}

Expand Down Expand Up @@ -1411,10 +1447,7 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
bool debug_dump_regs = false;
bool enable_bg_fill = true;
u32 __iomem *dlist_start, *dlist_next;
unsigned long irqflags;
unsigned int zpos = 0;
u32 lbm_offset = 0;
u32 lbm_size = 0;
bool found = false;
int idx;

Expand All @@ -1433,35 +1466,6 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
vc4_hvs_dump_state(hvs);
}

drm_atomic_crtc_for_each_plane(plane, crtc) {
vc4_plane_state = to_vc4_plane_state(plane->state);
lbm_size += vc4_plane_state->lbm_size;
}

if (drm_mm_node_allocated(&vc4_crtc->lbm)) {
spin_lock_irqsave(&vc4_crtc->irq_lock, irqflags);
drm_mm_remove_node(&vc4_crtc->lbm);
spin_unlock_irqrestore(&vc4_crtc->irq_lock, irqflags);
}

if (lbm_size) {
int ret;

spin_lock_irqsave(&vc4_crtc->irq_lock, irqflags);
ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
&vc4_crtc->lbm,
lbm_size, 1,
0, 0);
spin_unlock_irqrestore(&vc4_crtc->irq_lock, irqflags);

if (ret) {
pr_err("Failed to allocate LBM ret %d\n", ret);
return;
}
}

lbm_offset = vc4_crtc->lbm.start;

dlist_start = vc4->hvs->dlist + vc4_state->mm->mm_node.start;
dlist_next = dlist_start;

Expand All @@ -1473,8 +1477,6 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
if (plane->state->normalized_zpos != zpos)
continue;

vc4_plane_state = to_vc4_plane_state(plane->state);

/* Is this the first active plane? */
if (dlist_next == dlist_start) {
/* We need to enable background fill when a plane
Expand All @@ -1485,15 +1487,10 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
* already needs it or all planes on top blend from
* the first or a lower plane.
*/
vc4_plane_state = to_vc4_plane_state(plane->state);
enable_bg_fill = vc4_plane_state->needs_bg_fill;
}

if (vc4_plane_state->lbm_size) {
vc4_plane_state->dlist[vc4_plane_state->lbm_offset] =
lbm_offset;
lbm_offset += vc4_plane_state->lbm_size;
}

dlist_next += vc4_plane_write_dlist(plane, dlist_next);

found = true;
Expand Down Expand Up @@ -1731,10 +1728,14 @@ int vc4_hvs_debugfs_init(struct drm_minor *minor)
NULL);
}

if (vc4->gen >= VC4_GEN_6)
if (vc4->gen >= VC4_GEN_6) {
drm_debugfs_add_file(drm, "hvs_dlists", vc6_hvs_debugfs_dlist, NULL);
else
drm_debugfs_add_file(drm, "hvs_upm", vc6_hvs_debugfs_upm_allocs, NULL);
} else {
drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL);
}

drm_debugfs_add_file(drm, "hvs_lbm", vc4_hvs_debugfs_lbm_allocs, NULL);

drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL);

Expand All @@ -1754,6 +1755,7 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
unsigned int dlist_start;
size_t dlist_size;
size_t lbm_size;
unsigned int i;

hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL);
if (!hvs)
Expand Down Expand Up @@ -1793,6 +1795,11 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
else
dlist_size = 4096;

for (i = 0; i < VC4_NUM_UPM_HANDLES; i++) {
refcount_set(&hvs->upm_refcounts[i].refcount, 0);
hvs->upm_refcounts[i].hvs = hvs;
}

break;

default:
Expand Down Expand Up @@ -1835,6 +1842,7 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
}

drm_mm_init(&hvs->lbm_mm, 0, lbm_size);
ida_init(&hvs->lbm_handles);

if (vc4->gen >= VC4_GEN_6) {
ida_init(&hvs->upm_handles);
Expand Down
Loading
Loading