From d1b2dd22c359aefc124f17da41509c4c0328b09b Mon Sep 17 00:00:00 2001 From: Nathanne Isip Date: Sun, 29 Sep 2024 04:38:00 +0800 Subject: [PATCH] Implementation of added mathematical vector operations. --- src/util/VectorMath.cpp | 454 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 443 insertions(+), 11 deletions(-) diff --git a/src/util/VectorMath.cpp b/src/util/VectorMath.cpp index e5e5bd3..f49da0b 100644 --- a/src/util/VectorMath.cpp +++ b/src/util/VectorMath.cpp @@ -18,21 +18,12 @@ #include -#include - namespace ZhivoUtil { DynamicObject vector2Object(const std::vector& vec) { std::vector objects(vec.size()); - std::mutex mutex; - - #pragma omp parallel for - for(size_t i = 0; i < vec.size(); ++i) { - std::lock_guard lock(mutex); - DynamicObject obj(vec[i]); - - objects[i] = std::move(obj); - } + for(size_t i = 0; i < vec.size(); ++i) + objects[i] = std::move(DynamicObject(vec[i])); return DynamicObject( std::make_shared>(std::move(objects)) @@ -89,6 +80,348 @@ std::vector ZhivoUtil::VectorMath::add( return result; } +std::vector ZhivoUtil::VectorMath::sub( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorSub<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = left[i] - right[i]; + + #endif + + return result; +} + +std::vector ZhivoUtil::VectorMath::div( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorDiv<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = left[i] / right[i]; + + #endif + + return result; +} + +std::vector ZhivoUtil::VectorMath::mul( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorMul<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = left[i] * right[i]; + + #endif + + return result; +} + +std::vector ZhivoUtil::VectorMath::rem( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorRem<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = (double) ((long) left[i] % (long) right[i]); + + #endif + + return result; +} + +std::vector ZhivoUtil::VectorMath::bitwiseAnd( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorBitwiseAnd<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = (double) ((long) left[i] & (long) right[i]); + + #endif + + return result; +} + +std::vector ZhivoUtil::VectorMath::bitwiseOr( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorBitwiseOr<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = (double) ((long) left[i] | (long) right[i]); + + #endif + + return result; +} + +std::vector ZhivoUtil::VectorMath::bitwiseXor( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorBitwiseXor<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = (double) ((long) left[i] ^ (long) right[i]); + + #endif + + return result; +} + +std::vector ZhivoUtil::VectorMath::shiftLeft( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorShiftLeft<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = (double) ((long) left[i] << (long) right[i]); + + #endif + + return result; +} + +std::vector ZhivoUtil::VectorMath::shiftRight( + std::vector left, + std::vector right +) { + size_t size = left.size(); + if(size != right.size()) + throw std::invalid_argument("Vectors must be of the same size."); + + std::vector result(size); + + #ifdef __CUDACC__ + + double *d_left, *d_right, *d_result; + cudaMalloc(&d_left, size * sizeof(double)); + cudaMalloc(&d_right, size * sizeof(double)); + cudaMalloc(&d_result, size * sizeof(double)); + + cudaMemcpy(d_left, left.data(), size * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_right, right.data(), size * sizeof(double), cudaMemcpyHostToDevice); + + cudaVectorShift<<<(size + 255) / 256, 256>>>(d_left, d_right, d_result, size); + cudaMemcpy(result.data(), d_result, size * sizeof(double), cudaMemcpyDeviceToHost); + + cudaFree(d_left); + cudaFree(d_right); + cudaFree(d_result); + + #else + + #pragma omp parallel for + for(size_t i = 0; i < size; ++i) + result[i] = (double) ((long) left[i] >> (long) right[i]); + + #endif + + return result; +} + #ifdef __CUDACC__ static __global__ void cudaVectorAdd( const double* left, @@ -101,6 +434,105 @@ static __global__ void cudaVectorAdd( result[idx] = left[idx] + right[idx]; } +static __global__ void cudaVectorSub( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = left[idx] - right[idx]; +} + +static __global__ void cudaVectorDiv( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = left[idx] / right[idx]; +} + +static __global__ void cudaVectorMul( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = left[idx] * right[idx]; +} + +static __global__ void cudaVectorRem( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = (double) ((long) left[idx] * (long) right[idx]); +} + +static __global__ void cudaVectorBitwiseAnd( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = (double) ((long) left[idx] & (long) right[idx]); +} + +static __global__ void cudaVectorBitwiseOr( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = (double) ((long) left[idx] | (long) right[idx]); +} + +static __global__ void cudaVectorBitwiseXor( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = (double) ((long) left[idx] ^ (long) right[idx]); +} + +static __global__ void cudaVectorShiftLeft( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = (double) ((long) left[idx] << (long) right[idx]); +} + +static __global__ void cudaVectorShiftRight( + const double* left, + const double* right, + double* result, + size_t size +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < size) + result[idx] = (double) ((long) left[idx] >> (long) right[idx]); +} + #endif }