diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a50e65..4e87589 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ All notable changes to this project will be documented in this file. +## [2.1.2] - 2024-11-02 +### Fixed +- Improved semantic chunking accuracy with stricter similarity thresholds +- Enhanced logging in similarity calculations for better debugging +- Fixed chunk creation to better respect semantic boundaries + +### Updated +- Default similarity threshold increased to 0.5 +- Default dynamic threshold bounds adjusted (0.4 - 0.8) +- Improved chunk rebalancing logic with similarity checks +- Updated logging for similarity scores between sentences + ## [2.1.1] - 2024-11-01 ### Updated - Updated example scripts in README. diff --git a/package-lock.json b/package-lock.json index 9feeda6..51df09f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "semantic-chunking", - "version": "2.1.1", + "version": "2.1.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "semantic-chunking", - "version": "2.1.1", + "version": "2.1.2", "license": "ISC", "dependencies": { "@xenova/transformers": "^2.17.2", @@ -83,11 +83,11 @@ "integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==" }, "node_modules/@types/node": { - "version": "22.7.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.5.tgz", - "integrity": "sha512-jML7s2NAzMWc//QSJ1a3prpk78cOPchGvXJsC3C6R6PSMoooztvRVQEz89gmBTBY1SPMaqo5teB4uNHPdetShQ==", + "version": "22.8.7", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.8.7.tgz", + "integrity": "sha512-LidcG+2UeYIWcMuMUpBKOnryBWG/rnmOHQR5apjn8myTQcx3rinFRn7DcIFhMnS0PPFSC6OafdIKEad0lj6U0Q==", "dependencies": { - "undici-types": "~6.19.2" + "undici-types": "~6.19.8" } }, "node_modules/@xenova/transformers": { @@ -149,12 +149,11 @@ } }, "node_modules/bare-stream": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.3.0.tgz", - "integrity": "sha512-pVRWciewGUeCyKEuRxwv06M079r+fRjAQjBEK2P6OYGrO43O+Z0LrPZZEjlc4mB6C2RpZ9AxJ1s7NLEtOHO6eA==", + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.3.2.tgz", + "integrity": "sha512-EFZHSIBkDgSHIwj2l2QZfP4U5OcD4xFAOwhSb/vlr9PIqyGJGvB/nfClJbcnh3EY4jtPE4zsb5ztae96bVF79A==", "optional": true, "dependencies": { - "b4a": "^1.6.6", "streamx": "^2.20.0" } }, @@ -421,9 +420,9 @@ "integrity": "sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg==" }, "node_modules/node-abi": { - "version": "3.68.0", - "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.68.0.tgz", - "integrity": "sha512-7vbj10trelExNjFSBm5kTvZXXa7pZyKWx9RCKIyqe6I9Ev3IzGpQoqBP3a+cOdxY+pWj6VkP28n/2wWysBHD/A==", + "version": "3.71.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.71.0.tgz", + "integrity": "sha512-SZ40vRiy/+wRTf21hxkkEjPJZpARzUMVcJoQse2EF8qkUWbbO2z7vd5oA/H6bVH6SZQ5STGcu0KRDS7biNRfxw==", "dependencies": { "semver": "^7.3.5" }, @@ -791,12 +790,9 @@ } }, "node_modules/text-decoder": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.0.tgz", - "integrity": "sha512-n1yg1mOj9DNpk3NeZOx7T6jchTbyJS3i3cucbNN6FcdPriMZx7NsgrGpWWdWZZGxD7ES1XB+3uoqHMgOKaN+fg==", - "dependencies": { - "b4a": "^1.6.4" - } + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.1.tgz", + "integrity": "sha512-x9v3H/lTKIJKQQe7RPQkLfKAnc9lUTkWDypIQgTzPJAq+5/GCDHonmshfvlsNSj58yyshbIJJDLmU15qNERrXQ==" }, "node_modules/tunnel-agent": { "version": "0.6.0", diff --git a/package.json b/package.json index 2d3a7d6..fa053a6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "semantic-chunking", - "version": "2.1.1", + "version": "2.1.2", "description": "Semantically create chunks from large texts. Useful for workflows involving large language models (LLMs).", "repository": { "type": "git",