diff --git a/.gitignore b/.gitignore index a8a2b3f7..6a14bad9 100644 --- a/.gitignore +++ b/.gitignore @@ -43,4 +43,6 @@ __pycache__/ poetry.toml build/ -logs/ \ No newline at end of file +logs/ +include/bitnet-lut-kernels.h +include/kernel_config.ini diff --git a/src/ggml-bitnet-lut.cpp b/src/ggml-bitnet-lut.cpp index d6bea16f..d9cefbe3 100644 --- a/src/ggml-bitnet-lut.cpp +++ b/src/ggml-bitnet-lut.cpp @@ -15,9 +15,6 @@ void ggml_bitnet_init(void) { } initialized = true; - // if (wrapper == nullptr) { - // wrapper = new BITNET::BITNETGeMMWrapper(); - // } if (bitnet_tensor_extras == nullptr) { bitnet_tensor_extras = new bitnet_tensor_extra[GGML_BITNET_MAX_NODES]; } @@ -32,12 +29,6 @@ void ggml_bitnet_free(void) { } initialized = false; - // delete wrapper; - // wrapper = nullptr; - for (size_t i = 0; i < bitnet_tensor_extras_index; i++) { - // aligned_free(bitnet_tensor_extras[i].qweights); - // aligned_free(bitnet_tensor_extras[i].scales); - } delete[] bitnet_tensor_extras; bitnet_tensor_extras = nullptr; } @@ -99,9 +90,6 @@ void ggml_bitnet_init(void) { } initialized = true; - // if (wrapper == nullptr) { - // wrapper = new BITNET::BITNETGeMMWrapper(); - // } if (bitnet_tensor_extras == nullptr) { bitnet_tensor_extras = new bitnet_tensor_extra[GGML_BITNET_MAX_NODES]; } @@ -116,12 +104,6 @@ void ggml_bitnet_free(void) { } initialized = false; - // delete wrapper; - // wrapper = nullptr; - for (size_t i = 0; i < bitnet_tensor_extras_index; i++) { - // aligned_free(bitnet_tensor_extras[i].qweights); - // aligned_free(bitnet_tensor_extras[i].scales); - } delete[] bitnet_tensor_extras; bitnet_tensor_extras = nullptr; } diff --git a/src/ggml-bitnet-mad.cpp b/src/ggml-bitnet-mad.cpp index f75e6ca8..eeca82b1 100644 --- a/src/ggml-bitnet-mad.cpp +++ b/src/ggml-bitnet-mad.cpp @@ -85,6 +85,8 @@ size_t quantize_i2_s(const float * src, void * dst, int64_t nrow, int64_t n_per_ float* scale_ptr = (float*)((char*)i2_weight + n / 4); scale_ptr[0] = i2_scale; + free(q8); + // 32B for alignment return nrow * row_size / 4 + 32; }