diff --git a/CHANGELOG.md b/CHANGELOG.md index a7bbe67..137a4d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## [0.1.3] - Unrelease + ## [0.1.2] - 2024-08-19 - Encode granular features with base64 - Refactor result format to generic ISCC data model diff --git a/iscc_sct/demo.py b/iscc_sct/demo.py index 99b3610..0aec86b 100644 --- a/iscc_sct/demo.py +++ b/iscc_sct/demo.py @@ -443,9 +443,8 @@ def reset_all(): ) with gr.Row(variant="panel"): - with gr.Column(variant="panel"): - gr.Markdown( - """ + gr.Markdown( + """ ## Understanding ISCC Semantic Text-Codes ### What is an ISCC Semantic Text-Code? @@ -476,7 +475,11 @@ def reset_all(): The similarity shown is calculated by comparing the ISCC codes, not the original texts. This allows for efficient and privacy-preserving comparisons, as only the codes need to be shared or stored. +""" + ) + gr.Markdown( + """ ### Why is this useful? - **Content creators**: Find similar content across languages. - **Researchers**: Quickly compare documents or find related texts in different languages. @@ -490,20 +493,30 @@ def reset_all(): The "Explore Details & Advanced Options" section provides additional tools and information: 1. **ISCC Bit-Length**: Adjust the precision of the ISCC code. Higher values provide more detailed - comparisons but may be more sensitive to minor differences. +comparisons but may be more sensitive to minor differences. 2. **Max Tokens**: Set the maximum number of tokens per chunk. This affects how the text is split - for processing. +for processing. 3. **Chunked Text**: View how each input text is divided into chunks for processing. Each chunk is - color-coded and labeled with its size and simprint (a similarity preserving fingerprint). +color-coded and labeled with its size and simprint (a similarity preserving fingerprint). 4. **Granular Matches**: See a detailed comparison of individual chunks between Text A and Text B. - This table shows which specific parts of the texts are most similar, along with their approximate - cosine similarity (scaled -100% to +100%). +This table shows which specific parts of the texts are most similar (above 80%), along with their +approximate cosine similarity (scaled -100% to +100%). + +For more information about the **ISCC** see: +- https://github.com/iscc +- https://iscc.codes +- https://iscc.io +- [ISO 24138:2024](https://www.iso.org/standard/77899.html) """ - ) - + ) + with gr.Row(): + gr.Markdown( + f"iscc-sct v{sct.__version__} | Source Code: https://github.com/iscc/iscc-sct", + elem_classes="footer", + ) if __name__ == "__main__": # pragma: no cover demo.launch() diff --git a/poetry.lock b/poetry.lock index 6856d4e..f586e72 100644 --- a/poetry.lock +++ b/poetry.lock @@ -755,13 +755,13 @@ files = [ [[package]] name = "importlib-metadata" -version = "8.2.0" +version = "8.3.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_metadata-8.2.0-py3-none-any.whl", hash = "sha256:11901fa0c2f97919b288679932bb64febaeacf289d18ac84dd68cb2e74213369"}, - {file = "importlib_metadata-8.2.0.tar.gz", hash = "sha256:72e8d4399996132204f9a16dcc751af254a48f8d1b20b9ff0f98d4a8f901e73d"}, + {file = "importlib_metadata-8.3.0-py3-none-any.whl", hash = "sha256:42817a4a0be5845d22c6e212db66a94ad261e2318d80b3e0d363894a79df2b67"}, + {file = "importlib_metadata-8.3.0.tar.gz", hash = "sha256:9c8fa6e8ea0f9516ad5c8db9246a731c948193c7754d3babb0114a05b27dd364"}, ] [package.dependencies] diff --git a/tests/test_iscc_sct.py b/tests/test_iscc_sct.py index 3694bc2..3eca767 100644 --- a/tests/test_iscc_sct.py +++ b/tests/test_iscc_sct.py @@ -31,7 +31,7 @@ def test_version(): - assert sct.__version__ == "0.1.2" + assert sct.__version__ == "0.1.3" def test_code_text_semantic_default():