-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
16e99cb
commit efa2452
Showing
9 changed files
with
977 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,6 +66,7 @@ jobs: | |
hm-jax: ${{ steps.filter.outputs.hm-jax }} | ||
hm-kubeflow-calculate: ${{ steps.filter.outputs.hm-kubeflow-calculate }} | ||
hm-kubeflow-classify-mnist: ${{ steps.filter.outputs.hm-kubeflow-classify-mnist }} | ||
hm-lancedb: ${{ steps.filter.outputs.hm-lancedb }} | ||
hm-langchain-chat-pdf: ${{ steps.filter.outputs.hm-langchain-chat-pdf }} | ||
hm-langgraph-chat-pdf: ${{ steps.filter.outputs.hm-langgraph-chat-pdf }} | ||
hm-llama-index-chat-pdf: ${{ steps.filter.outputs.hm-llama-index-chat-pdf }} | ||
|
@@ -267,6 +268,9 @@ jobs: | |
hm-kubeflow-classify-mnist: | ||
- '.github/workflows/test.yml' | ||
- 'machine-learning/hm-kubeflow/pipelines/classify-mnist/**' | ||
hm-lancedb: | ||
- '.github/workflows/test.yml' | ||
- 'data-storage/hm-lancedb/**' | ||
hm-langchain-chat-pdf: | ||
- '.github/workflows/test.yml' | ||
- 'machine-learning/hm-langchain/applications/chat-pdf/**' | ||
|
@@ -1398,6 +1402,39 @@ jobs: | |
with: | ||
directory: data-storage/hm-duckdb/query-protobuf | ||
|
||
lancedb-test: | ||
name: LanceDB | Test | ||
needs: detect-changes | ||
if: ${{ needs.detect-changes.outputs.hm-lancedb == 'true' }} | ||
runs-on: ubuntu-24.04 | ||
environment: test | ||
timeout-minutes: 10 | ||
steps: | ||
- name: Checkout | ||
uses: actions/[email protected] | ||
- name: Install uv | ||
uses: astral-sh/[email protected] | ||
with: | ||
version: 0.5.11 | ||
enable-cache: true | ||
cache-dependency-glob: data-storage/hm-lancedb/uv.lock | ||
- name: Set up Python | ||
uses: actions/[email protected] | ||
with: | ||
python-version-file: data-storage/hm-lancedb/pyproject.toml | ||
- name: Install dependencies | ||
working-directory: data-storage/hm-lancedb | ||
run: | | ||
uv sync --dev | ||
- name: Test | ||
working-directory: data-storage/hm-lancedb | ||
run: | | ||
uv run poe test-coverage | ||
- name: Upload coverage to Codecov | ||
uses: codecov/[email protected] | ||
with: | ||
directory: data-storage/hm-lancedb | ||
|
||
protobuf-test: | ||
name: Protobuf | Test | ||
needs: detect-changes | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
uv-install-python:: | ||
uv python install | ||
uv-update-lock-file: | ||
uv lock | ||
uv-install-dependencies: | ||
uv sync --dev | ||
|
||
uv-run-dev: | ||
uv run poe dev | ||
uv-run-test: | ||
uv run poe test | ||
uv-run-test-coverage: | ||
uv run poe test-coverage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
[project] | ||
name = "hm-lancedb" | ||
version = "1.0.0" | ||
requires-python = "~=3.12.0" | ||
dependencies = [ | ||
"lancedb==0.17.0", | ||
"polars==1.18.0", | ||
"sentence-transformers==3.3.1", | ||
] | ||
|
||
[dependency-groups] | ||
dev = [ | ||
"poethepoet==0.31.1", | ||
"pytest==8.3.4", | ||
"pytest-cov==6.0.0", | ||
] | ||
|
||
[tool.uv] | ||
package = false | ||
|
||
[tool.poe.tasks] | ||
dev = "python src/main.py" | ||
test = "pytest --verbose --verbose" | ||
test-coverage = "pytest --cov=. --cov-report=xml" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
id,quote,author | ||
1,"Wubba Lubba Dub Dub!",Rick Sanchez | ||
2,"Nobody exists on purpose. Nobody belongs anywhere. We're all going to die. Come watch TV.",Morty Smith | ||
3,"Sometimes science is more art than science.",Rick Sanchez | ||
4,"I'm not a hero. I'm a high-functioning alcoholic.",Rick Sanchez | ||
5,"Get your shit together, get it all together and put it in a backpack.",Morty Smith |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
class TestDummy: | ||
def test_dummy(self): | ||
assert 1 + 1 == 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import logging | ||
|
||
import lancedb | ||
import polars as pl | ||
from lancedb.embeddings import get_registry | ||
from lancedb.pydantic import LanceModel, Vector | ||
|
||
registry = get_registry() | ||
func = registry.get("sentence-transformers").create(name="all-MiniLM-L6-v2") | ||
|
||
|
||
class Quotes(LanceModel): | ||
name: str | ||
line: str = func.SourceField() | ||
vector: Vector = func.VectorField() | ||
|
||
|
||
def create_and_populate_table( | ||
db: lancedb.connect, df: pl.DataFrame | ||
) -> lancedb.table.Table: | ||
table = db.create_table("quotes", schema=Quotes, mode="overwrite") | ||
table.add(df) | ||
return table | ||
|
||
|
||
def perform_semantic_search(table: lancedb.table.Table, query: str) -> pl.DataFrame: | ||
return table.search(query).limit(5).to_polars() | ||
|
||
|
||
def main(): | ||
url = "https://raw.githubusercontent.com/Abhiram970/RickBot/refs/heads/main/Rick_and_Morty.csv" | ||
df = pl.read_csv(url) | ||
db = lancedb.connect("~/.lancedb") | ||
table = create_and_populate_table(db, df) | ||
|
||
query = "What is the meaning of life?" | ||
df = perform_semantic_search(table, query) | ||
logging.info("Question: %s", query) | ||
logging.info("Answer: %s", df["line"][0]) | ||
logging.info(df) | ||
|
||
|
||
if __name__ == "__main__": | ||
logging.basicConfig(level=logging.INFO) | ||
main() |
Oops, something went wrong.