forked from GPUOpen-LibrariesAndSDKs/RenderPipelineShaders
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_multithreading_shared.hpp
250 lines (198 loc) · 7.67 KB
/
test_multithreading_shared.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// This file is part of the AMD Render Pipeline Shaders SDK which is
// released under the AMD INTERNAL EVALUATION LICENSE.
//
// See file LICENSE.txt for full license details.
#pragma once
#include "utils/rps_test_host.hpp"
#include "app_framework/afx_threadpool.hpp"
RPS_DECLARE_RPSL_ENTRY(test_multithreading, mt_main);
#include <random>
#include <atomic>
static const char c_Shader[] = R"(
struct V2P
{
float4 Pos : SV_Position;
float4 Color : COLOR0;
};
struct CBData
{
float2 Pos;
float Scale;
float AspectRatio;
float3 Color;
};
#ifndef VULKAN
ConstantBuffer<CBData> cb : register(b0);
#else
[[vk::push_constant]] CBData cb;
#endif
#define PI (3.14159f)
V2P VSMain(uint vId : SV_VertexID)
{
float2 pos[3] =
{
{ -0.5f * cb.AspectRatio, -0.5f * tan(PI / 6), },
{ 0.0f * cb.AspectRatio, 0.5f / cos(PI / 6), },
{ 0.5f * cb.AspectRatio, -0.5f * tan(PI / 6), },
};
V2P vsOut;
vsOut.Pos = float4(pos[min(vId, 2)] * cb.Scale + cb.Pos, 0, 1);
vsOut.Color = float4(cb.Color, 1.0f);
return vsOut;
}
float4 PSMain(V2P psIn) : SV_Target0
{
return psIn.Color;
}
)";
#define TEST_APP_NAME_RAW "TestMultithreading"
using namespace DirectX;
#define THREAD_SAFE_REQUIRE(EXPR) \
do \
{ \
std::lock_guard<std::mutex> lock(m_catch2Mutex); \
REQUIRE(EXPR); \
} while (false);
class TestRpsMultithreading : public RpsTestHost
{
public:
static constexpr uint32_t MAX_THREADS = 8;
static constexpr uint32_t MIN_THREADS = 4;
struct TriangleDataCPU
{
XMFLOAT2 Offset;
float Scale;
float Speed;
XMFLOAT3 Color;
};
struct TriangleDataGPU
{
XMFLOAT2 Pos;
float Scale;
float AspectRatio;
XMFLOAT3 Color;
};
struct CmdRangeContext
{
uint32_t BatchIndex;
uint32_t LastCmdListIndex;
};
public:
TestRpsMultithreading()
{
const uint32_t hwConcurrency = std::thread::hardware_concurrency();
const uint32_t numThreads = std::min(MAX_THREADS, std::max(MIN_THREADS, (hwConcurrency > 0) ? (hwConcurrency - 1) : hwConcurrency));
m_threadPool.Init(numThreads);
}
protected:
void Init(RpsDevice hRpsDevice)
{
uint32_t baseTriangles = 4096;
#ifndef _DEBUG
m_speedMultiplier = IsSoftwareAdapter() ? 1 : 64;
#endif
m_triangleData.resize(baseTriangles * m_speedMultiplier);
m_runwayLength *= m_speedMultiplier;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0.0f, 1.0f);
for (uint32_t i = 0; i < m_triangleData.size(); i++)
{
m_triangleData[i].Offset = XMFLOAT2((dist(gen) - 0.5f) * m_runwayLength, dist(gen) * 2.0f - 1.0f);
m_triangleData[i].Color = XMFLOAT3(dist(gen), dist(gen), dist(gen));
m_triangleData[i].Scale = dist(gen) * 0.1f + 0.1f;
m_triangleData[i].Speed = (dist(gen) + 0.5f) * 0.01f * m_speedMultiplier;
}
RpsTestHost::OnInit(hRpsDevice, rpsTestLoadRpslEntry(test_multithreading, mt_main));
}
virtual void UpdateRpsPipeline(uint64_t frameIndex,
uint64_t completedFrameIndex,
const RpsResourceDesc& backBufferDesc,
RpsRuntimeResource* pBackBuffers)
{
const RpsRuntimeResource* argResources[] = {pBackBuffers};
RpsConstant argData[] = {&backBufferDesc, &m_numPasses};
RpsTestHost::OnUpdate(frameIndex, completedFrameIndex, _countof(argData), argData, argResources);
}
void OnRender(uint64_t frameIndex, uint32_t numThreads)
{
m_waitHandles.clear();
m_executeCount = 0;
m_failCount = 0;
RpsRenderGraphBatchLayout batchLayout = {};
REQUIRE_RPS_OK(rpsRenderGraphGetBatchLayout(GetRpsRenderGraph(), &batchLayout));
assert(batchLayout.numCmdBatches == 1);
const uint32_t numCmds = batchLayout.pCmdBatches[0].numCmds;
const uint32_t batchCmdEnd = batchLayout.pCmdBatches[0].cmdBegin + batchLayout.pCmdBatches[0].numCmds;
uint32_t cmdBegin = 0;
uint32_t cmdsPerThread = (numCmds + numThreads - 1) / numThreads;
uint32_t numThreadsActual = (numCmds + cmdsPerThread - 1) / cmdsPerThread;
static constexpr uint32_t MAX_BATCHES = 32;
std::pair<RpsRuntimeCommandBuffer, uint32_t> buffers[MAX_BATCHES];
uint32_t lastCmdListId = UINT32_MAX;
for (uint32_t iBatch = 0; iBatch < numThreadsActual; iBatch++)
{
buffers[iBatch].first = AcquireNewCommandBuffer(&lastCmdListId);
buffers[iBatch].second = lastCmdListId;
}
for (uint32_t iBatch = 0; iBatch < numThreadsActual; iBatch++)
{
const uint32_t cmdEnd = std::min(batchCmdEnd, cmdBegin + cmdsPerThread);
if (cmdEnd == cmdBegin)
{
break;
}
RpsAfxThreadPool::WaitHandle waitHdl =
m_threadPool.EnqueueJob([this, iBatch, buffer = buffers[iBatch], frameIndex, cmdBegin, cmdEnd]() {
CmdRangeContext rangeContext = {iBatch, buffer.second};
RpsRenderGraphRecordCommandInfo recordInfo = {};
recordInfo.hCmdBuffer = buffer.first;
recordInfo.pUserContext = &rangeContext;
recordInfo.frameIndex = frameIndex;
recordInfo.cmdBeginIndex = cmdBegin;
recordInfo.numCmds = cmdEnd - cmdBegin;
RpsResult threadResult = rpsRenderGraphRecordCommands(GetRpsRenderGraph(), &recordInfo);
THREAD_SAFE_REQUIRE(threadResult == RPS_OK);
});
{
std::lock_guard<std::mutex> lock(m_cmdListsMutex);
m_waitHandles.emplace_back(std::move(waitHdl));
}
cmdBegin = cmdEnd;
}
m_threadPool.WaitIdle();
REQUIRE(m_failCount == 0);
REQUIRE((m_executeCount + numThreadsActual) == m_waitHandles.size());
}
protected:
virtual bool IsSoftwareAdapter() const
{
return false;
}
virtual RpsRuntimeCommandBuffer AcquireNewCommandBuffer(uint32_t* pInsertAfter) = 0;
virtual void DrawGeometryPass(const RpsCmdCallbackContext* pContext) = 0;
virtual void BindNodes(RpsSubprogram hRpslEntry) override
{
RpsResult result = RPS_OK;
result = rpsProgramBindNode(hRpslEntry,
"GeometryPass",
&TestRpsMultithreading::DrawGeometryPass,
this,
RPS_CMD_CALLBACK_CUSTOM_ALL);
REQUIRE(result == RPS_OK);
}
protected:
uint32_t m_numPasses = 4;
std::vector<TriangleDataCPU> m_triangleData;
float m_runwayLength = 15.0f;
int32_t m_speedMultiplier = 1;
RpsAfxThreadPool m_threadPool;
uint32_t m_renderJobs = 8;
std::vector<RpsAfxThreadPool::WaitHandle> m_waitHandles;
std::atomic_int32_t m_failCount = {};
std::atomic_int32_t m_executeCount = {};
std::mutex m_cmdListsMutex;
std::mutex m_catch2Mutex;
};