From 17ec7f759f09a7b3e8ef7e14a8c8f26e711b0d4c Mon Sep 17 00:00:00 2001 From: Mehmet Can Ay Date: Thu, 22 Aug 2024 15:55:23 +0200 Subject: [PATCH 1/5] update: datastew version --- api/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/requirements.txt b/api/requirements.txt index 23e4b01..dcf565a 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -3,7 +3,7 @@ httpx~=0.27.0 uvicorn~=0.30.1 fastapi~=0.111.0 starlette~=0.37.2 -datastew~=0.3.3 +datastew~=0.3.4 numpy~=1.25.2 pandas~=2.1.0 requests~=2.31.0 From 9787834b3ceb6cb808f60c89b1cf8339c86b76ba Mon Sep 17 00:00:00 2001 From: Mehmet Can Ay Date: Thu, 22 Aug 2024 15:56:07 +0200 Subject: [PATCH 2/5] refactor: implement new weaviate repository --- api/routes.py | 90 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 25 deletions(-) diff --git a/api/routes.py b/api/routes.py index a8227e3..416d20c 100644 --- a/api/routes.py +++ b/api/routes.py @@ -107,13 +107,19 @@ async def get_all_terminologies(): @app.put("/terminologies/{id}", tags=["terminologies"]) -async def create_or_update_terminology(id: str, name: str): +async def create_terminology(id: str, name: str): try: terminology = Terminology(name=name, id=id) repository.store(terminology) - return {"message": f"Terminology {id} created or updated successfully"} + return {"message": f"Terminology {id} created successfully"} except Exception as e: - raise HTTPException(status_code=400, detail=f"Failed to create or update terminology: {str(e)}") + raise HTTPException(status_code=400, detail=f"Failed to create terminology: {str(e)}") + + +@app.get("/models", tags=["models"]) +async def get_all_models(): + sentence_embedders = repository.get_all_sentence_embedders() + return sentence_embedders @app.get("/concepts", tags=["concepts"]) @@ -123,17 +129,26 @@ async def get_all_concepts(): @app.put("/concepts/{id}", tags=["concepts"]) -async def create_or_update_concept(id: str, terminology_id: str, name: str): +async def create_concept(concept_id: str, concept_name: str, terminology_name: str): try: - terminology = repository.session.query(Terminology).filter(Terminology.id == terminology_id).first() - if not terminology: - raise HTTPException(status_code=404, detail=f"Terminology with id {terminology_id} not found") - - concept = Concept(terminology=terminology, name=name, id=id) + if not repository._terminology_exists(terminology_name): + raise HTTPException(status_code=404, detail=f"Terminology {terminology_name} not found") + result = repository.client.query.get( + "Terminology", + ["name", "_additional { id }"] + ).with_where({ + "path": "name", + "operator": "Equal", + "valueText": terminology_name + }).do() + terminology_data = result["data"]["Get"]["Terminology"][0] + terminology_id = terminology_data["_additional"]["id"] + terminology = Terminology(name=terminology_name, id=terminology_id) + concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=concept_id) repository.store(concept) - return {"message": f"Concept {id} created or updated successfully"} + return {"message": f"Concept {concept_id} created successfully"} except Exception as e: - raise HTTPException(status_code=400, detail=f"Failed to create or update concept: {str(e)}") + raise HTTPException(status_code=400, detail=f"Failed to create concept: {str(e)}") @app.get("/mappings", tags=["mappings"]) @@ -143,33 +158,58 @@ async def get_all_mappings(): @app.put("/concepts/{id}/mappings", tags=["concepts", "mappings"]) -async def create_concept_and_attach_mapping(id: str, terminology_id: str, concept_name: str, text: str): +async def create_concept_and_attach_mapping(concept_id: str, concept_name: str, terminology_name, text: str): try: - terminology = repository.session.query(Terminology).filter(Terminology.id == terminology_id).first() - if not terminology: - raise HTTPException(status_code=404, detail=f"Terminology with id {terminology_id} not found") - concept = Concept(terminology=terminology, name=concept_name, id=id) + if not repository._terminology_exists(terminology_name): + raise HTTPException(status_code=404, detail=f"Terminology {terminology_name} not found") + result = repository.client.query.get( + "Terminology", + ["name", "_additional { id }"] + ).with_where({ + "path": "name", + "operator": "Equal", + "valueText": terminology_name + }).do() + terminology_data = result["data"]["Get"]["Terminology"][0] + terminology_id = terminology_data["_additional"]["id"] + terminology = Terminology(name=terminology_name, id=terminology_id) + concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=concept_id) repository.store(concept) embedding = embedding_model.get_embedding(text) - mapping = Mapping(concept=concept, text=text, embedding=embedding) + model_name = embedding_model.get_model_name() + mapping = Mapping(concept=concept, text=text, embedding=embedding, sentence_embedder=model_name) repository.store(mapping) - return {"message": f"Concept {id} created or updated successfully"} + return {"message": f"Concept {concept_id} created successfully"} except Exception as e: - raise HTTPException(status_code=400, detail=f"Failed to create or update concept: {str(e)}") + raise HTTPException(status_code=400, detail=f"Failed to create concept: {str(e)}") @app.put("/mappings/", tags=["mappings"]) -async def create_or_update_mapping(concept_id: str, text: str): +async def create_mapping(concept_id: str, text: str): try: - concept = repository.session.query(Concept).filter(Concept.id == concept_id).first() - if not concept: + if not repository._concept_exists(concept_id=concept_id): raise HTTPException(status_code=404, detail=f"Concept with id {concept_id} not found") + result = repository.client.query.get( + "Concept", + ["conceptID", "prefLabel", "hasTerminology { ... on Terminology { _additional { id } name } }"] + ).with_where({ + "path": "conceptID", + "operator": "Equal", + "valueText": concept_id + }).do() + terminology_data = result["data"]["get"]["Concept"][0]["hasTerminology"] + terminology_name = terminology_data["name"] + terminology_id = terminology_data["_additional"]["id"] + terminology = Terminology(terminology_name, terminology_id) + concept_name = result["data"]["Get"]["Concept"][0]["prefLabel"] + concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=concept_id) embedding = embedding_model.get_embedding(text) - mapping = Mapping(concept=concept, text=text, embedding=embedding) + model_name = embedding_model.get_model_name() + mapping = Mapping(concept=concept, text=text, embedding=embedding, sentence_embedder=model_name) repository.store(mapping) - return {"message": f"Mapping created or updated successfully"} + return {"message": "Mapping created successfully"} except Exception as e: - raise HTTPException(status_code=400, detail=f"Failed to create or update mapping: {str(e)}") + raise HTTPException(status_code=400, detail=f"Failed to create mapping: {str(e)}") @app.post("/mappings", tags=["mappings"]) From 333b29639b6e65b77ee396c7160bd4f2bf536c51 Mon Sep 17 00:00:00 2001 From: Mehmet Can Ay Date: Thu, 22 Aug 2024 19:06:28 +0200 Subject: [PATCH 3/5] refactor: fix endpoint ids --- api/routes.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/api/routes.py b/api/routes.py index 416d20c..e61e82d 100644 --- a/api/routes.py +++ b/api/routes.py @@ -129,7 +129,7 @@ async def get_all_concepts(): @app.put("/concepts/{id}", tags=["concepts"]) -async def create_concept(concept_id: str, concept_name: str, terminology_name: str): +async def create_concept(id: str, concept_name: str, terminology_name: str): try: if not repository._terminology_exists(terminology_name): raise HTTPException(status_code=404, detail=f"Terminology {terminology_name} not found") @@ -144,9 +144,9 @@ async def create_concept(concept_id: str, concept_name: str, terminology_name: s terminology_data = result["data"]["Get"]["Terminology"][0] terminology_id = terminology_data["_additional"]["id"] terminology = Terminology(name=terminology_name, id=terminology_id) - concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=concept_id) + concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=id) repository.store(concept) - return {"message": f"Concept {concept_id} created successfully"} + return {"message": f"Concept {id} created successfully"} except Exception as e: raise HTTPException(status_code=400, detail=f"Failed to create concept: {str(e)}") @@ -158,7 +158,7 @@ async def get_all_mappings(): @app.put("/concepts/{id}/mappings", tags=["concepts", "mappings"]) -async def create_concept_and_attach_mapping(concept_id: str, concept_name: str, terminology_name, text: str): +async def create_concept_and_attach_mapping(id: str, concept_name: str, terminology_name, text: str): try: if not repository._terminology_exists(terminology_name): raise HTTPException(status_code=404, detail=f"Terminology {terminology_name} not found") @@ -173,13 +173,13 @@ async def create_concept_and_attach_mapping(concept_id: str, concept_name: str, terminology_data = result["data"]["Get"]["Terminology"][0] terminology_id = terminology_data["_additional"]["id"] terminology = Terminology(name=terminology_name, id=terminology_id) - concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=concept_id) + concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=id) repository.store(concept) embedding = embedding_model.get_embedding(text) model_name = embedding_model.get_model_name() mapping = Mapping(concept=concept, text=text, embedding=embedding, sentence_embedder=model_name) repository.store(mapping) - return {"message": f"Concept {concept_id} created successfully"} + return {"message": f"Concept {id} created successfully"} except Exception as e: raise HTTPException(status_code=400, detail=f"Failed to create concept: {str(e)}") From 4731bc9e5e741edbb8f86096dcd8b8dde5478c26 Mon Sep 17 00:00:00 2001 From: Mehmet Can Ay Date: Mon, 26 Aug 2024 16:55:49 +0200 Subject: [PATCH 4/5] refactor: update datastew version --- api/requirements.txt | 2 +- api/routes.py | 52 ++++++-------------------------------------- 2 files changed, 8 insertions(+), 46 deletions(-) diff --git a/api/requirements.txt b/api/requirements.txt index dcf565a..e76cbf5 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -3,7 +3,7 @@ httpx~=0.27.0 uvicorn~=0.30.1 fastapi~=0.111.0 starlette~=0.37.2 -datastew~=0.3.4 +datastew~=0.3.5 numpy~=1.25.2 pandas~=2.1.0 requests~=2.31.0 diff --git a/api/routes.py b/api/routes.py index e61e82d..c925727 100644 --- a/api/routes.py +++ b/api/routes.py @@ -131,19 +131,7 @@ async def get_all_concepts(): @app.put("/concepts/{id}", tags=["concepts"]) async def create_concept(id: str, concept_name: str, terminology_name: str): try: - if not repository._terminology_exists(terminology_name): - raise HTTPException(status_code=404, detail=f"Terminology {terminology_name} not found") - result = repository.client.query.get( - "Terminology", - ["name", "_additional { id }"] - ).with_where({ - "path": "name", - "operator": "Equal", - "valueText": terminology_name - }).do() - terminology_data = result["data"]["Get"]["Terminology"][0] - terminology_id = terminology_data["_additional"]["id"] - terminology = Terminology(name=terminology_name, id=terminology_id) + terminology = repository.get_terminology(terminology_name) concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=id) repository.store(concept) return {"message": f"Concept {id} created successfully"} @@ -160,19 +148,7 @@ async def get_all_mappings(): @app.put("/concepts/{id}/mappings", tags=["concepts", "mappings"]) async def create_concept_and_attach_mapping(id: str, concept_name: str, terminology_name, text: str): try: - if not repository._terminology_exists(terminology_name): - raise HTTPException(status_code=404, detail=f"Terminology {terminology_name} not found") - result = repository.client.query.get( - "Terminology", - ["name", "_additional { id }"] - ).with_where({ - "path": "name", - "operator": "Equal", - "valueText": terminology_name - }).do() - terminology_data = result["data"]["Get"]["Terminology"][0] - terminology_id = terminology_data["_additional"]["id"] - terminology = Terminology(name=terminology_name, id=terminology_id) + terminology = repository.get_terminology(terminology_name) concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=id) repository.store(concept) embedding = embedding_model.get_embedding(text) @@ -187,22 +163,7 @@ async def create_concept_and_attach_mapping(id: str, concept_name: str, termino @app.put("/mappings/", tags=["mappings"]) async def create_mapping(concept_id: str, text: str): try: - if not repository._concept_exists(concept_id=concept_id): - raise HTTPException(status_code=404, detail=f"Concept with id {concept_id} not found") - result = repository.client.query.get( - "Concept", - ["conceptID", "prefLabel", "hasTerminology { ... on Terminology { _additional { id } name } }"] - ).with_where({ - "path": "conceptID", - "operator": "Equal", - "valueText": concept_id - }).do() - terminology_data = result["data"]["get"]["Concept"][0]["hasTerminology"] - terminology_name = terminology_data["name"] - terminology_id = terminology_data["_additional"]["id"] - terminology = Terminology(terminology_name, terminology_id) - concept_name = result["data"]["Get"]["Concept"][0]["prefLabel"] - concept = Concept(terminology=terminology, pref_label=concept_name, concept_identifier=concept_id) + concept = repository.get_concept(concept_id) embedding = embedding_model.get_embedding(text) model_name = embedding_model.get_model_name() mapping = Mapping(concept=concept, text=text, embedding=embedding, sentence_embedder=model_name) @@ -213,9 +174,10 @@ async def create_mapping(concept_id: str, text: str): @app.post("/mappings", tags=["mappings"]) -async def get_closest_mappings_for_text(text: str, limit: int = 5): +async def get_closest_mappings_for_text(text: str, terminology_name: str = "SNOMED CT", + sentence_embedder: str = "sentence-transformers/all-mpnet-base-v2", limit: int = 5): embedding = embedding_model.get_embedding(text).tolist() - closest_mappings = repository.get_closest_mappings_with_similarities(embedding, limit) + closest_mappings = repository.get_terminology_and_model_specific_closest_mappings(embedding, terminology_name, sentence_embedder, limit) mappings = [] for mapping, similarity in closest_mappings: concept = mapping.concept @@ -300,4 +262,4 @@ async def import_snomed_ct(background_tasks: BackgroundTasks): if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=5000) + uvicorn.run(app, host="0.0.0.0", port=5001) From a84e345d9b40b15a5edda7bbdbb3140846fa1360 Mon Sep 17 00:00:00 2001 From: TimAdams84 Date: Wed, 28 Aug 2024 11:22:37 +0200 Subject: [PATCH 5/5] Fix port --- api/routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/routes.py b/api/routes.py index c925727..64d52f3 100644 --- a/api/routes.py +++ b/api/routes.py @@ -262,4 +262,4 @@ async def import_snomed_ct(background_tasks: BackgroundTasks): if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=5001) + uvicorn.run(app, host="0.0.0.0", port=5000)