Skip to content

Commit

Permalink
NiFi & CG db: updated sqlite db annotations schema + ann manager.
Browse files Browse the repository at this point in the history
  • Loading branch information
vladd-bit committed Dec 16, 2024
1 parent c355a71 commit 0dcf0d8
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
6 changes: 3 additions & 3 deletions nifi/user-scripts/annotation_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def main():
for record in records:
if OPERATION_MODE == "check":
document_id = str(record[DOCUMENT_ID_FIELD_NAME])
query = "SELECT id, elasticsearch_id FROM annotations WHERE elasticsearch_id LIKE '%" + document_id + "%' LIMIT 1"
query = "SELECT id FROM annotations WHERE id LIKE '%" + document_id + "%' LIMIT 1"
result = connect_and_query(query, db_file_path, sqlite_connection=_sqlite_connection_ro, cursor=_cursor, keep_conn_open=True)

if len(result) < 1:
Expand All @@ -92,11 +92,11 @@ def main():
if OPERATION_MODE == "insert":
document_id = str(record["meta." + DOCUMENT_ID_FIELD_NAME])
nlp_id = str(record["nlp.id"])
query = "INSERT OR REPLACE INTO annotations (elasticsearch_id) VALUES (" + '"' + document_id + "_" + nlp_id + '"' + ")"
query = "INSERT OR REPLACE INTO annotations (id) VALUES (" + '"' + document_id + "_" + nlp_id + '"' + ")"
result = connect_and_query(query, db_file_path, sqlite_connection=_sqlite_connection_rw, sql_script_mode=True, cursor=_cursor, keep_conn_open=True)
output_stream.append(record)

if _cursor is not None:
if _cursor is not None:
_cursor.close()
if _sqlite_connection_ro is not None:
_sqlite_connection_ro.close()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
CREATE TABLE IF NOT EXISTS documents (
id VARCHAR PRIMARY KEY,
document_id VARCHAR NOT NULL,
document_text TEXT
);

CREATE TABLE IF NOT EXISTS annotations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
elasticsearch_id VARCHAR NULL,
label VARCHAR(255),
label_id VARCHAR(10),
id VARCHAR PRIMARY KEY,
document_id VARCHAR NULL REFERENCES documents(id),
label VARCHAR,
label_id VARCHAR(100),
source_value VARCHAR,
accuracy DECIMAL,
context_similarity DECIMAL,
star_char INTEGER,
end_char INTEGER,
medcat_info VARCHAR,
tui VARCHAR(20),
cui VARCHAR(20),
tui VARCHAR(30),
cui VARCHAR(30),
icd10 VARCHAR,
ontologies VARCHAR,
snomed VARCHAR,
Expand All @@ -25,16 +24,18 @@ CREATE TABLE IF NOT EXISTS annotations (
model_id_used INTEGER REFERENCES nlp_models NULL
);

CREATE INDEX annotations_elasticsearch_id_index ON annotations (elasticsearch_id);
CREATE INDEX annotations_document_id_index ON annotations (document_id);

CREATE TABLE IF NOT EXISTS meta_annotations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
annotation_id INTEGER REFERENCES annotations NULL,
annotation_id VARCHAR REFERENCES annotations(id) NOT NULL,
"value" VARCHAR,
confidence DECIMAL,
"name" VARCHAR
);

CREATE INDEX meta_annotations_annotation_id_index ON meta_annotations (annotation_id);

CREATE TABLE IF NOT EXISTS nlp_models (
id BIGINT PRIMARY KEY,
"name" VARCHAR NOT NULL,
Expand Down

0 comments on commit 0dcf0d8

Please sign in to comment.