From 36cfd74a80ec04edb7f2195c6addbf518e6524d8 Mon Sep 17 00:00:00 2001 From: Valtteri Koskivuori Date: Sat, 4 Nov 2023 04:06:48 +0200 Subject: [PATCH] Only allocate small work buffers in worker nodes Previously, all worker nodes allocated a buffer with the full image dimensions, which could be very big. Instead, allocate that only on the master node, and use small textures in worker node render threads, since worker nodes never need anything more than that anyway. --- src/datatypes/image/texture.c | 8 ++++++++ src/datatypes/image/texture.h | 2 ++ src/datatypes/scene.c | 8 -------- src/renderer/renderer.c | 6 +++++- src/utils/protocol/server.c | 1 + src/utils/protocol/worker.c | 20 +++++++------------- 6 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/datatypes/image/texture.c b/src/datatypes/image/texture.c index 41cbcd68..b28cf030 100644 --- a/src/datatypes/image/texture.c +++ b/src/datatypes/image/texture.c @@ -11,6 +11,7 @@ #include "texture.h" #include "../../utils/logging.h" #include "../../utils/assert.h" +#include //General-purpose setPixel function void setPixel(struct texture *t, struct color c, size_t x, size_t y) { @@ -144,6 +145,13 @@ bool texture_uses_alpha(const struct texture *t) { return false; } +void tex_clear(struct texture *t) { + if (!t) return; + size_t prim_size = t->precision == char_p ? sizeof(char) : sizeof(float); + size_t bytes = t->width * t->height * t->channels * prim_size; + memset(t->data.byte_p, 0, bytes); +} + void destroyTexture(struct texture *t) { if (t) { free(t->data.byte_p); diff --git a/src/datatypes/image/texture.h b/src/datatypes/image/texture.h index cdcfb043..02ea0c57 100644 --- a/src/datatypes/image/texture.h +++ b/src/datatypes/image/texture.h @@ -55,6 +55,8 @@ void textureToSRGB(struct texture *t); bool texture_uses_alpha(const struct texture *t); +void tex_clear(struct texture *t); + /// Deallocate a given texture /// @param tex Texture to deallocate void destroyTexture(struct texture *tex); diff --git a/src/datatypes/scene.c b/src/datatypes/scene.c index cfebfc7b..24e61fd1 100644 --- a/src/datatypes/scene.c +++ b/src/datatypes/scene.c @@ -231,14 +231,6 @@ int loadScene(struct renderer *r, char *input) { for (size_t i = 0; i < r->state.tileCount; ++i) r->state.renderTiles[i].total_samples = r->prefs.sampleCount; - // Some of this stuff seems like it should be in newRenderer(), but notice - // how they depend on r->prefs, which is populated by parseJSON - - struct camera cam = r->scene->cameras[r->prefs.selected_camera]; - //Allocate memory for render buffer - //Render buffer is used to store accurate color values for the renderers' internal use - r->state.renderBuffer = newTexture(float_p, cam.width, cam.height, 3); - //Print a useful warning to user if the defined tile size results in less renderThreads if (r->state.tileCount < r->prefs.threads) { logr(warning, "WARNING: Rendering with a less than optimal thread count due to large tile size!\n"); diff --git a/src/renderer/renderer.c b/src/renderer/renderer.c index 0a01235c..608890a6 100644 --- a/src/renderer/renderer.c +++ b/src/renderer/renderer.c @@ -105,6 +105,10 @@ struct texture *renderFrame(struct renderer *r) { // Local render threads + one thread for every client size_t total_thread_count = r->prefs.threads + (int)r->state.clientCount; r->state.workers = calloc(total_thread_count, sizeof(*r->state.workers)); + + //Allocate memory for render buffer + //Render buffer is used to store accurate color values for the renderers' internal use + r->state.renderBuffer = newTexture(float_p, camera.width, camera.height, 3); //Create & boot workers (Nonblocking) for (int t = 0; t < (int)total_thread_count; ++t) { @@ -393,7 +397,7 @@ struct renderer *newRenderer() { void destroyRenderer(struct renderer *r) { if (r) { destroyScene(r->scene); - destroyTexture(r->state.renderBuffer); + if (r->state.renderBuffer) destroyTexture(r->state.renderBuffer); free(r->state.renderTiles); free(r->state.workers); free(r->state.tileMutex); diff --git a/src/utils/protocol/server.c b/src/utils/protocol/server.c index 4365348e..a3d884bb 100644 --- a/src/utils/protocol/server.c +++ b/src/utils/protocol/server.c @@ -171,6 +171,7 @@ static cJSON *processSubmitWork(struct worker *state, const cJSON *json) { for (int y = tile.end.y - 1; y > tile.begin.y - 1; --y) { for (int x = tile.begin.x; x < tile.end.x; ++x) { struct color value = textureGetPixel(tileImage, x - tile.begin.x, y - tile.begin.y, false); + value = colorToSRGB(value); setPixel(state->output, value, x, y); } } diff --git a/src/utils/protocol/worker.c b/src/utils/protocol/worker.c index 686693f1..160b360b 100644 --- a/src/utils/protocol/worker.c +++ b/src/utils/protocol/worker.c @@ -144,7 +144,7 @@ static void *workerThread(void *arg) { mutex_lock(sockMutex); thread->current = getWork(sock); mutex_release(sockMutex); - struct texture *tileBuffer = newTexture(char_p, thread->current->width, thread->current->height, 3); + struct texture *tileBuffer = newTexture(float_p, thread->current->width, thread->current->height, 3); sampler *sampler = newSampler(); struct camera *cam = thread->cam; @@ -155,7 +155,7 @@ static void *workerThread(void *arg) { while (thread->current && r->state.rendering) { if (tileBuffer->width != thread->current->width || tileBuffer->height != thread->current->height) { destroyTexture(tileBuffer); - tileBuffer = newTexture(char_p, thread->current->width, thread->current->height, 3); + tileBuffer = newTexture(float_p, thread->current->width, thread->current->height, 3); } long totalUsec = 0; long samples = 0; @@ -168,7 +168,9 @@ static void *workerThread(void *arg) { uint32_t pixIdx = (uint32_t)(y * cam->width + x); initSampler(sampler, SAMPLING_STRATEGY, thread->completedSamples - 1, r->prefs.sampleCount, pixIdx); - struct color output = textureGetPixel(r->state.renderBuffer, x, y, false); + int local_x = x - thread->current->begin.x; + int local_y = y - thread->current->begin.y; + struct color output = textureGetPixel(tileBuffer, local_x, local_y, false); struct color sample = path_trace(cam_get_ray(cam, x, y, sampler), r->scene, r->prefs.bounces, sampler); nan_clamp(&sample, &output); @@ -179,16 +181,7 @@ static void *workerThread(void *arg) { float t = 1.0f / thread->completedSamples; output = colorCoef(t, output); - //Store internal render buffer (float precision) - setPixel(r->state.renderBuffer, output, x, y); - - //Gamma correction - output = colorToSRGB(output); - - //And store the image data - int localX = x - thread->current->begin.x; - int localY = y - thread->current->begin.y; - setPixel(tileBuffer, output, localX, localY); + setPixel(tileBuffer, output, local_x, local_y); } } //For performance metrics @@ -215,6 +208,7 @@ static void *workerThread(void *arg) { mutex_lock(sockMutex); thread->current = getWork(sock); mutex_release(sockMutex); + tex_clear(tileBuffer); } bail: destroySampler(sampler);