From ca9b12799ea8e8949eebbbfe679f0b5b6f889ca3 Mon Sep 17 00:00:00 2001 From: Erhun Giray TUNCAY <48091473+giraygi@users.noreply.github.com> Date: Mon, 26 Aug 2024 19:43:42 +0200 Subject: [PATCH] implemented basic backup functionality for #11 --- ansible/group_vars/all.yml | 4 ++ ansible/playbook_backup.yml | 4 ++ ansible/roles/backup/tasks/main.yml | 86 +++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 ansible/playbook_backup.yml create mode 100644 ansible/roles/backup/tasks/main.yml diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index 47377d7..d304893 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -7,6 +7,7 @@ ols4_config_file: ./dataload/configs/efo.json # config file that includes the me delimited_ontology_prefixes: # ontology prefixes delimited by commas. This variable can be used in update and removaƶ of ontologies ols4_default_swagger_url: https://www.ebi.ac.uk/ols4 ols4_swagger_url: +ols4_solr_url: http://localhost:8983/solr ingest_batch_size: 1000 ingest_pool_size: 20 ingest_attempts: 5 @@ -15,6 +16,9 @@ frontend_dockerfile: frontend_envfile: dataload_dockerfile: alternative_dockercomposefile: +neo4j_data_volume: ols4_ols4-neo4j-data +neo4j_container: ols4_ols4-neo4j_1 +solr_container: ols4_ols4-solr_1 docker_user: '{{ ansible_user }}' diff --git a/ansible/playbook_backup.yml b/ansible/playbook_backup.yml new file mode 100644 index 0000000..ba89817 --- /dev/null +++ b/ansible/playbook_backup.yml @@ -0,0 +1,4 @@ + +- hosts: all + roles: + - backup diff --git a/ansible/roles/backup/tasks/main.yml b/ansible/roles/backup/tasks/main.yml new file mode 100644 index 0000000..cf10364 --- /dev/null +++ b/ansible/roles/backup/tasks/main.yml @@ -0,0 +1,86 @@ + +- name: Stop Neo4J container + community.docker.docker_container: + name: '{{ neo4j_container }}' + state: stopped + +- name: Dump Neo4J database with timestamp in a new container + ansible.builtin.shell: 'docker run --name dump --entrypoint="/bin/bash" -v {{ neo4j_data_volume }}:/var/lib/neo4j/data neo4j -c "./bin/neo4j-admin database dump neo4j --to-path=/var/lib/neo4j/data && mv /var/lib/neo4j/data/neo4j.dump /var/lib/neo4j/data/tsdb$(date "+%Y.%m.%d-%H.%M.%S").dump"' + args: + executable: /bin/bash + +- name: Remove dump container + community.docker.docker_container: + name: dump + state: absent + +- name: Start Neo4J container + community.docker.docker_container: + name: '{{ neo4j_container }}' + state: started + +- name: Fetch total number of rows + uri: + url: "{{ ols4_solr_url }}/ols4_autocomplete/select?q=*:*&rows=0&wt=json" + return_content: yes + register: solr_response + +- name: Extract total number of rows + set_fact: + total_rows: "{{ solr_response.json.response.numFound | int }}" + +- name: Initialize start variable + set_fact: + start: 0 + +- name: Dump Solr index in batches + block: + - name: Fetch batch of rows + uri: + url: "{{ ols4_solr_url }}/ols4_autocomplete/select?q=*:*&wt=csv&rows=10000&start={{ start }}" + return_content: yes + register: batch_response + - name: Save batch to file + copy: + content: "{{ batch_response.content }}" + dest: "solr_autocomplete_dump_part{{ start // 10000 + 1 }}.csv" + - name: Increment start variable + set_fact: + start: start + 10000 + until: start >= total_rows + +- name: Combine all parts into a single file + shell: cat solr_autocomplete_dump_part*.csv > solr_autocomplete_dump.csv + +- name: Fetch total number of rows + uri: + url: "{{ ols4_solr_url }}/ols4_entities/select?q=*:*&rows=0&wt=json" + return_content: yes + register: solr_response + +- name: Extract total number of rows + set_fact: + total_rows: "{{ solr_response.json.response.numFound | int }}" + +- name: Initialize start variable + set_fact: + start: 0 + +- name: Dump Solr index in batches + block: + - name: Fetch batch of rows + uri: + url: "{{ ols4_solr_url }}/ols4_entities/select?q=*:*&wt=csv&rows=10000&start={{ start }}" + return_content: yes + register: batch_response + - name: Save batch to file + copy: + content: "{{ batch_response.content }}" + dest: "solr_entities_dump_part{{ start // 10000 + 1 }}.csv" + - name: Increment start variable + set_fact: + start: start + 10000 + until: start >= total_rows + +- name: Combine all parts into a single file + shell: cat solr_entities_dump_part*.csv > solr_entities_dump.csv \ No newline at end of file