From ade9069bd16e97aad1d57b091ca8e1bcd76a57f3 Mon Sep 17 00:00:00 2001 From: Nathanne Isip Date: Tue, 29 Oct 2024 07:55:00 +0800 Subject: [PATCH] Initial implementation of Linear Regression functionalities on ml package path. --- lib/zhvlib/ML.cc | 175 ++++++++++++++++++++++++++++++++++++++++++++++ lib/zhvlib/ML.hpp | 32 +++++++++ 2 files changed, 207 insertions(+) create mode 100644 lib/zhvlib/ML.cc create mode 100644 lib/zhvlib/ML.hpp diff --git a/lib/zhvlib/ML.cc b/lib/zhvlib/ML.cc new file mode 100644 index 0000000..b077373 --- /dev/null +++ b/lib/zhvlib/ML.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2024 - Nathanne Isip + * This file is part of Zhivo. + * + * Zhivo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation, either version 3 of the License, + * or (at your option) any later version. + * + * Zhivo is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Zhivo. If not, see . + */ + +#include "zhvlib/ML.hpp" + +#include +#include +#include + +static inline std::vector arrayToDoubleVector(std::vector array) { + std::vector values(array.size()); + + #pragma omp parallel for + for(size_t i = 0; i < array.size(); i++) { + if(!array[i].isNumber()) + throw std::runtime_error("Value from array is not a number"); + + values[i] = array[i].getNumber(); + } + + return values; +} + +static inline double calculateMean(std::vector array) { + std::vector values = arrayToDoubleVector(array); + size_t arraySize = array.size(); + double sum = 0.0; + + #pragma omp parallel for reduction(+:sum) + for(size_t i = 0; i < arraySize; i++) + sum += values[i]; + + return sum / arraySize; +} + +ZHIVO_FUNC(ml_trendline_calculate) { + if(args.size() != 2) + throw std::runtime_error( + "Expecting 2 argument, got " + + std::to_string(args.size()) + + "." + ); + + DynamicObject xObj = args.at(0), + yObj = args.at(1); + + if(!xObj.isArray() || !yObj.isArray()) + throw std::runtime_error("Parameter x and y must be both number array"); + + std::vector xObjArray = *xObj.getArray(); + std::vector yObjArray = *yObj.getArray(); + + if(xObjArray.size() != yObjArray.size()) + throw std::runtime_error("Data set size of x and y did not match"); + + double x = calculateMean(xObjArray), + y = calculateMean(yObjArray), + numerator = 0.0, + denominator = 0.0; + + #pragma omp parallel for + for(size_t i = 0; i < xObjArray.size(); i++) { + numerator += (xObjArray[i].getNumber() - x) * + (yObjArray[i].getNumber() - y); + + denominator += (xObjArray[i].getNumber() - x) * + (xObjArray[i].getNumber() - x); + } + + double slope = numerator / denominator; + std::vector returnValues; + + returnValues.emplace_back(DynamicObject(slope)); + returnValues.emplace_back(DynamicObject(y - slope * x)); + + return DynamicObject(std::make_shared>( + returnValues + )); +} + +ZHIVO_FUNC(ml_trendline_calculateRmse) { + if(args.size() != 3) + throw std::runtime_error( + "Expecting 3 argument, got " + + std::to_string(args.size()) + + "." + ); + + DynamicObject xObj = args.at(0), + yObj = args.at(1), + model = args.at(2); + std::vector regModel = *model.getArray(); + + if(!model.isArray() || regModel.size() != 2) + throw std::runtime_error("Invalid linear regression model"); + + if(!xObj.isArray() || !yObj.isArray()) + throw std::runtime_error("Parameter x and y must be both number array"); + + std::vector xObjArray = *xObj.getArray(); + std::vector yObjArray = *yObj.getArray(); + + if(xObjArray.size() != yObjArray.size()) + throw std::runtime_error("Data set size of x and y did not match"); + + double sumSquaredErrs = 0.0; + size_t paramSize = xObjArray.size(); + + #pragma omp parallel for + for(size_t i = 0; i < paramSize; i++) { + std::vector model; + model.emplace_back(regModel.at(0)); + model.emplace_back(regModel.at(1)); + + std::vector params; + params.emplace_back(DynamicObject( + std::make_shared>(model) + )); + params.emplace_back(xObjArray[i]); + + double yPred = ml_trendline_predict(symtab, params) + .getNumber(); + double error = yObjArray[i].getNumber() - yPred; + + sumSquaredErrs += error * error; + } + + return DynamicObject(std::sqrt(sumSquaredErrs / paramSize)); +} + +ZHIVO_FUNC(ml_trendline_predict) { + if(args.size() != 2) + throw std::runtime_error( + "Expecting 3 argument, got " + + std::to_string(args.size()) + + "." + ); + + DynamicObject model = args.at(0), + value = args.at(1); + std::vector regModel = *model.getArray(); + + if(!model.isArray() || regModel.size() != 2) + throw std::runtime_error("Invalid linear regression model"); + + if(!value.isNumber()) + throw std::runtime_error("Cannot predict linear regression value for non-numbers"); + + DynamicObject slope = regModel.at(0), + intercept = regModel.at(1); + + if(!slope.isNumber() || !intercept.isNumber()) + throw std::runtime_error("Linear regression model's slope and intercept must be a number"); + + return DynamicObject( + slope.getNumber() * + value.getNumber() + + intercept.getNumber() + ); +} diff --git a/lib/zhvlib/ML.hpp b/lib/zhvlib/ML.hpp new file mode 100644 index 0000000..6fe9428 --- /dev/null +++ b/lib/zhvlib/ML.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024 - Nathanne Isip + * This file is part of Zhivo. + * + * Zhivo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation, either version 3 of the License, + * or (at your option) any later version. + * + * Zhivo is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Zhivo. If not, see . + */ + +#ifndef ZHIVO_STDLIB_ML_CC +#define ZHIVO_STDLIB_ML_CC + +#include "ZhivoLibrary.hpp" + +ZHIVO_LIB_START + +ZHIVO_FUNC(ml_trendline_calculate); +ZHIVO_FUNC(ml_trendline_calculateRmse); +ZHIVO_FUNC(ml_trendline_predict); + +ZHIVO_LIB_END + +#endif