From 5746257b525ff149f6fbae5f8a17527c114e9643 Mon Sep 17 00:00:00 2001 From: Thane Thomson Date: Thu, 16 Jun 2022 19:53:45 -0400 Subject: [PATCH 1/5] Refactor scripts to facilitate dynamic monitoring This commit is a pretty big overhaul to the current testnet scripts that will allow us to implement use cases that involve nodes being dynamically added to and removed from the network. It does this by swapping out Prometheus for a combination of InfluxDB and Telegraf. The "monitor" server now runs an InfluxDB instance that listens for incoming data from Telegraf agents installed on all of the nodes. The Telegraf agents are configured to poll the Tendermint test app's Prometheus endpoint (collecting all Prometheus metrics) as well as system metrics (CPU, memory, disk usage, etc.). Telegraf regularly pushes these metrics to the monitor's InfluxDB server. The InfluxDB server also provides a convenient web-based UI to explore stored data, with graphical visualization tools similar to what Prometheus provides. This commit also: - simplifies the testnet deployment process, - refactors the Ansible playbooks into roles, making them more reusable across playbooks, - uses Terraform to reliably generate the Ansible hosts file (and delete it automatically once the infrastructure's been destroyed), - refactors the Terraform scripts according to a more standardized layout, - updates the usage instructions in the README. Signed-off-by: Thane Thomson --- .gitignore | 3 + Makefile | 54 ++--- README.md | 56 +++-- ansible/Makefile | 5 + ansible/base.yaml | 22 -- ansible/config-deploy.yaml | 19 -- ansible/deploy.yaml | 25 ++ ansible/init-testapp.yaml | 14 -- ansible/prometheus-node-exporter.yaml | 19 -- ansible/prometheus.yaml | 29 --- ansible/remove-testapp-data.yaml | 23 -- ansible/restart-testapp.yaml | 11 - ansible/roles/common/files/iptables-rules.v4 | 17 ++ ansible/roles/common/files/iptables-rules.v6 | 17 ++ ansible/roles/common/tasks/main.yaml | 27 +++ ansible/roles/influxdb/files/config.toml | 4 + ansible/roles/influxdb/tasks/main.yaml | 89 +++++++ ansible/roles/telegraf/tasks/main.yaml | 26 +++ .../roles/telegraf/templates/telegraf.conf.j2 | 218 ++++++++++++++++++ ansible/roles/telegraf/vars/.gitkeep | 0 ansible/roles/tendermint/tasks/main.yaml | 41 ++++ ansible/roles/testapp/tasks/main.yaml | 67 ++++++ .../roles/testapp/templates/run-testapp.sh.j2 | 7 + .../testapp/templates/stop-testapp.sh.j2 | 7 + ansible/start-testapp.yaml | 10 - ansible/stop-testapp.yaml | 10 - .../prometheus-node-exporter.service | 13 -- ansible/templates/prometheus.service.j2 | 17 -- ansible/templates/prometheus.yml.j2 | 21 -- ansible/templates/testappd.service.j2 | 17 -- ansible/update-testapp.yaml | 32 +-- ansible/vars.yaml | 21 ++ script/configgen.sh | 18 +- script/runload.sh | 8 + script/secretsgen.sh | 16 ++ testnet.toml | 96 ++++---- tf/Makefile | 2 + tf/{nodes.tf => droplets.tf} | 24 +- tf/hosts.tftpl | 6 + tf/outputs.tf | 7 + tf/project.tf | 2 +- tf/variables.tf | 17 ++ 42 files changed, 761 insertions(+), 376 deletions(-) create mode 100644 ansible/Makefile delete mode 100644 ansible/base.yaml delete mode 100644 ansible/config-deploy.yaml create mode 100644 ansible/deploy.yaml delete mode 100644 ansible/init-testapp.yaml delete mode 100644 ansible/prometheus-node-exporter.yaml delete mode 100644 ansible/prometheus.yaml delete mode 100644 ansible/remove-testapp-data.yaml delete mode 100644 ansible/restart-testapp.yaml create mode 100644 ansible/roles/common/files/iptables-rules.v4 create mode 100644 ansible/roles/common/files/iptables-rules.v6 create mode 100644 ansible/roles/common/tasks/main.yaml create mode 100644 ansible/roles/influxdb/files/config.toml create mode 100644 ansible/roles/influxdb/tasks/main.yaml create mode 100644 ansible/roles/telegraf/tasks/main.yaml create mode 100644 ansible/roles/telegraf/templates/telegraf.conf.j2 create mode 100644 ansible/roles/telegraf/vars/.gitkeep create mode 100644 ansible/roles/tendermint/tasks/main.yaml create mode 100644 ansible/roles/testapp/tasks/main.yaml create mode 100644 ansible/roles/testapp/templates/run-testapp.sh.j2 create mode 100644 ansible/roles/testapp/templates/stop-testapp.sh.j2 delete mode 100644 ansible/start-testapp.yaml delete mode 100644 ansible/stop-testapp.yaml delete mode 100644 ansible/templates/prometheus-node-exporter.service delete mode 100644 ansible/templates/prometheus.service.j2 delete mode 100644 ansible/templates/prometheus.yml.j2 delete mode 100644 ansible/templates/testappd.service.j2 create mode 100644 ansible/vars.yaml create mode 100755 script/runload.sh create mode 100755 script/secretsgen.sh rename tf/{nodes.tf => droplets.tf} (52%) create mode 100644 tf/hosts.tftpl create mode 100644 tf/outputs.tf create mode 100644 tf/variables.tf diff --git a/.gitignore b/.gitignore index aed3842..e076dcb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ /tf/.terraform.lock.hcl /ansible/testnet/* /ansible/hosts +# These files are auto-generated during deployment +/ansible/secrets.yaml +/ansible/roles/telegraf/vars/secrets.yaml diff --git a/Makefile b/Makefile index 102f338..48fee50 100644 --- a/Makefile +++ b/Makefile @@ -1,50 +1,34 @@ DO_INSTANCE_TAGNAME=v035-testnet LOAD_RUNNER_COMMIT_HASH ?= 51685158fe36869ab600527b852437ca0939d0cc LOAD_RUNNER_CMD=go run github.com/tendermint/tendermint/test/e2e/runner@$(LOAD_RUNNER_COMMIT_HASH) +E2E_RUNNER_VERSION=v0.35.5 export DO_INSTANCE_TAGNAME +export LOAD_RUNNER_CMD +export E2E_RUNNER_VERSION -.PHONY: terraform-init -terraform-init: +.PHONY: init +init: $(MAKE) -C ./tf/ init -.PHONY: terraform-apply -terraform-apply: +.PHONY: deploy +deploy: $(MAKE) -C ./tf/ apply + ./script/configgen.sh ./ansible/hosts + ./script/secretsgen.sh ./ansible/secrets.yaml + ANSIBLE_HOST_KEY_CHECKING=False \ + ansible-playbook -i ./ansible/hosts -u root ./ansible/deploy.yaml -f 10 -.PHONY: hosts -hosts: - echo "[validators]" > ./ansible/hosts - doctl compute droplet list --tag-name $(DO_INSTANCE_TAGNAME) --tag-name "testnet-node" | tail -n+2 | tr -s ' ' | cut -d' ' -f2,3 | sort -k1 | sed 's/\(.*\) \(.*\)/\2 name=\1/g' >> ./ansible/hosts - echo "[prometheus]" >> ./ansible/hosts - doctl compute droplet list --tag-name $(DO_INSTANCE_TAGNAME) --tag-name "testnet-observability" | tail -n+2 | tr -s ' ' | cut -d' ' -f3 >> ./ansible/hosts - -.PHONY: configgen -configgen: - ./script/configgen.sh `tail -n+2 ./ansible/hosts | head -n -2 |cut -d' ' -f1| paste -s -d, -` - -.PHONY: ansible-install -ansible-install: - cd ansible && \ - ansible-playbook -i hosts -u root base.yaml -f 10 && \ - ansible-playbook -i hosts -u root prometheus-node-exporter.yaml -f 10 && \ - ansible-playbook -i hosts -u root init-testapp.yaml -f 10 && \ - ansible-playbook -i hosts -u root update-testapp.yaml -f 10 - -.PHONY: prometheus-init -prometheus-init: - cd ansible && ansible-playbook -i hosts -u root prometheus.yaml -f 10 - -.PHONY: start-network -start-network: - cd ansible && ansible-playbook -i hosts -u root start-testapp.yaml -f 10 +.PHONY: update-testapp +update-testapp: + ./script/configgen.sh ./ansible/hosts + ANSIBLE_HOST_KEY_CHECKING=False \ + ansible-playbook -i ./ansible/hosts -u root ./ansible/update-testapp.yaml .PHONY: runload runload: - $(LOAD_RUNNER_CMD) load \ - --ip-list `tail -n+2 ./ansible/hosts | head -n -2 |cut -d' ' -f1| paste -s -d, -` \ - --seed-delta $(shell echo $$RANDOM) + ./script/runload.sh ./ansible/hosts -.PHONY: terraform-destroy -terraform-destroy: +.PHONY: destroy +destroy: $(MAKE) -C ./tf/ destroy diff --git a/README.md b/README.md index e454e59..38f8b88 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ test networks on Digital Ocean (DO). - [Ansible CLI][Ansible] - Go -## Instructions +## Deployment After you have all the prerequisites installed and have configured your [`testnet.toml`](./testnet.toml) file appropriately: @@ -32,30 +32,49 @@ ssh_keys = ["ab:cd:ef:01:23:45:67:89:ab:cd:ef:01:23:45:67:89"] EOF # 4. Initialize Terraform (only needed once) -make terraform-init +make init -# 5. Create the VMs for the validators and Prometheus as specified in ./testnet.toml -# Be sure to use your actual DO token and SSH key fingerprints for the DO_TOKEN -# and DO_SSH_KEYS variables. -make terraform-apply +# 5. Create the VMs for the validators and monitoring server as specified in +# ./testnet.toml +make deploy -# 6. Discover the IP addresses of the hosts for Ansible -make hosts +# 6. Execute a load test against the network +make runload +``` -# 7. Generate the testnet configuration -make configgen +## Data visualization -# 8. Install all necessary software on the created VMs using Ansible -make ansible-install +Once you have deployed a testnet, there will be a "monitor" server available +running an [InfluxDB] instance. Check the generated `ansible/hosts` file for the +IP address of the monitor and navigate to `http://:8086` in your web +browser to access the InfluxDB interface. -# 9. Initialize the Prometheus instance -make prometheus-init +The username is `admin` and the password is automatically generated during +deployment. The password can be found in the `ansible/secrets.yaml` file (not +committed to the repository). -# 10. Start the test application on all of the validators -make start-network +The UI is relatively straightforward, but if you need additional help please +see the [InfluxDB docs][InfluxDB]. -# 11. Execute a load test against the network -make runload +## Reloading the test app + +In cases where you don't want to tear down the infrastructure and only want to +reload the test app running across the network (say there are new changes on the +`v0.35.x` branch in the Git repo): + +```bash +make update-testapp +``` + +This will stop the test app, remove all config and data, redeploy the config, +and restart the test app. + +## Teardown + +To destroy all Digital Ocean infrastructure: + +```bash +make destroy ``` ## Metrics @@ -68,3 +87,4 @@ metrics and view their associated graphs. [Ansible]: https://docs.ansible.com/ansible/latest/index.html [Terraform]: https://www.terraform.io/docs [doctl]: https://docs.digitalocean.com/reference/doctl/how-to/install/ +[InfluxDB]: https://docs.influxdata.com/influxdb/v2.2/ diff --git a/ansible/Makefile b/ansible/Makefile new file mode 100644 index 0000000..fb80627 --- /dev/null +++ b/ansible/Makefile @@ -0,0 +1,5 @@ +.PHONY: deploy +deploy: + ansible-playbook -i hosts -u root base.yaml -f 10 && \ + ansible-playbook -i hosts -u root init-testapp.yaml -f 10 && \ + ansible-playbook -i hosts -u root update-testapp.yaml -f 10 diff --git a/ansible/base.yaml b/ansible/base.yaml deleted file mode 100644 index 34aea94..0000000 --- a/ansible/base.yaml +++ /dev/null @@ -1,22 +0,0 @@ -- name: base - hosts: validators,prometheus - gather_facts: yes - become_method: sudo - vars: - ansible_host_key_checking: false - - tasks: - - name: Update apt cache - ansible.builtin.apt: - update_cache: yes - cache_valid_time: 60 - - name: install deps - ansible.builtin.apt: - name: - - git - - gcc - - golang-1.17-go - - prometheus - - prometheus-node-exporter - state: latest - become: yes diff --git a/ansible/config-deploy.yaml b/ansible/config-deploy.yaml deleted file mode 100644 index 519e718..0000000 --- a/ansible/config-deploy.yaml +++ /dev/null @@ -1,19 +0,0 @@ -- name: initialize app - hosts: validators - become: false - gather_facts: yes - hosts: validators - vars: - tm_home: /root/.testapp/ - ansible_host_key_checking: false - - tasks: - - name: copy configuration files - ansible.builtin.copy: - src: ./testnet/{{ hostvars[inventory_hostname].name }}/config/config.toml - dest: "{{ tm_home }}/config/config.toml" - - name: restart app - ansible.builtin.systemd: - name: testappd - state: restarted - become: yes diff --git a/ansible/deploy.yaml b/ansible/deploy.yaml new file mode 100644 index 0000000..61c0922 --- /dev/null +++ b/ansible/deploy.yaml @@ -0,0 +1,25 @@ +--- +# This playbook must be executed as root. +# +# It's also critical that the monitor is deployed first before the nodes +# because the monitor deployment generates an API token for Telegraf instances +# on the nodes to access the InfluxDB database on the monitor. +- hosts: monitor + become: no + vars_files: + - ./vars.yaml + - ./secrets.yaml + roles: + - common + - influxdb + +- hosts: nodes + become: no + vars_files: + - ./vars.yaml + - ./secrets.yaml + roles: + - common + - telegraf + - tendermint + - testapp diff --git a/ansible/init-testapp.yaml b/ansible/init-testapp.yaml deleted file mode 100644 index 30c0384..0000000 --- a/ansible/init-testapp.yaml +++ /dev/null @@ -1,14 +0,0 @@ -- name: initialize app - hosts: validators - become: false - gather_facts: yes - hosts: validators - vars: - tm_home: /root/.testapp/ - ansible_host_key_checking: false - - tasks: - - name: copy configuration files - ansible.builtin.copy: - src: ./testnet/{{ hostvars[inventory_hostname].name }}/ - dest: "{{ tm_home }}/" diff --git a/ansible/prometheus-node-exporter.yaml b/ansible/prometheus-node-exporter.yaml deleted file mode 100644 index 18845a0..0000000 --- a/ansible/prometheus-node-exporter.yaml +++ /dev/null @@ -1,19 +0,0 @@ -- name: prometheus node exporter - hosts: validators,prometheus - gather_facts: yes - become_method: sudo - vars: - ansible_host_key_checking: false - - tasks: - - name: add node-exporter systemd unit file - ansible.builtin.copy: - src: templates/prometheus-node-exporter.service - dest: /etc/prometheus/prometheus-node-exporter.service - become: yes - - name: start the systemd unit - ansible.builtin.systemd: - name: prometheus-node-exporter - state: started - daemon_reload: yes - enabled: yes diff --git a/ansible/prometheus.yaml b/ansible/prometheus.yaml deleted file mode 100644 index 8fcd488..0000000 --- a/ansible/prometheus.yaml +++ /dev/null @@ -1,29 +0,0 @@ -- name: create prometheus - become: false - gather_facts: yes - hosts: prometheus - vars: - ansible_host_key_checking: false - tasks: - - name: create unit file - template: - src: templates/prometheus.service.j2 - dest: /lib/systemd/system/prometheus.service - become: yes - - name: create config - template: - src: templates/prometheus.yml.j2 - dest: /etc/prometheus/prometheus.yml - become: yes - - name: start the systemd-unit - ansible.builtin.systemd: - name: prometheus - state: started - daemon_reload: yes - enabled: yes - - name: restart the systemd-unit # not sure why this is necessary - ansible.builtin.systemd: - name: prometheus - state: restarted - daemon_reload: yes - enabled: yes diff --git a/ansible/remove-testapp-data.yaml b/ansible/remove-testapp-data.yaml deleted file mode 100644 index b5ef941..0000000 --- a/ansible/remove-testapp-data.yaml +++ /dev/null @@ -1,23 +0,0 @@ -- name: remove testapp data - hosts: validators - become_method: sudo - gather_facts: yes - vars: - tm_home: /root/.testapp/ - - tasks: - - name: stop app - ansible.builtin.systemd: - name: testappd - state: stopped - become: yes - - name: delete tm data - ansible.builtin.file: - path: "{{ tm_home }}" - state: absent - become: yes - - name: delete app data - ansible.builtin.file: - path: "{{ ansible_user_dir }}/data" - state: absent - become: yes diff --git a/ansible/restart-testapp.yaml b/ansible/restart-testapp.yaml deleted file mode 100644 index 1dc8c44..0000000 --- a/ansible/restart-testapp.yaml +++ /dev/null @@ -1,11 +0,0 @@ -- name: update testapp - hosts: validators - become_method: sudo - gather_facts: yes - - tasks: - - name: restart app - ansible.builtin.systemd: - name: testappd - state: restarted - become: yes diff --git a/ansible/roles/common/files/iptables-rules.v4 b/ansible/roles/common/files/iptables-rules.v4 new file mode 100644 index 0000000..4d55cb0 --- /dev/null +++ b/ansible/roles/common/files/iptables-rules.v4 @@ -0,0 +1,17 @@ +# Allow SSH on port 22 and related traffic. Rate-limit SSH login attempts. +# Log and drop failed SSH logins. +*filter +:INPUT ACCEPT [0:0] +:FORWARD ACCEPT [0:0] +:OUTPUT ACCEPT [368:94560] +:LOGDROP - [0:0] +-A INPUT -i lo -j ACCEPT +-A INPUT -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT +-A INPUT -p tcp -m tcp --dport 22 -m conntrack --ctstate NEW -m recent --update --seconds 60 --hitcount 11 --name DEFAULT --mask 255.255.255.255 --rsource -j LOGDROP +-A INPUT -p tcp -m tcp --dport 22 -m conntrack --ctstate NEW -m recent --set --name DEFAULT --mask 255.255.255.255 --rsource +-A INPUT -p tcp -m tcp --dport 22 -j ACCEPT +-A INPUT -m limit --limit 5/min -j LOG --log-prefix "iptables denied: " --log-level 7 +-A INPUT -j DROP +-A LOGDROP -j LOG --log-prefix "iptables denied ssh: " --log-level 7 +-A LOGDROP -j DROP +COMMIT diff --git a/ansible/roles/common/files/iptables-rules.v6 b/ansible/roles/common/files/iptables-rules.v6 new file mode 100644 index 0000000..cebb06c --- /dev/null +++ b/ansible/roles/common/files/iptables-rules.v6 @@ -0,0 +1,17 @@ +# Allow SSH on port 22 and related traffic. Rate-limit SSH login attempts. +# Log and drop failed SSH logins. +*filter +:INPUT ACCEPT [0:0] +:FORWARD ACCEPT [0:0] +:OUTPUT ACCEPT [0:0] +:LOGDROP - [0:0] +-A INPUT -i lo -j ACCEPT +-A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT +-A INPUT -p tcp -m tcp --dport 22 -m conntrack --ctstate NEW -m recent --update --seconds 60 --hitcount 11 --name DEFAULT --mask ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff --rsource -j LOGDROP +-A INPUT -p tcp -m tcp --dport 22 -m conntrack --ctstate NEW -m recent --set --name DEFAULT --mask ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff --rsource +-A INPUT -p tcp -m tcp --dport 22 -j ACCEPT +-A INPUT -m limit --limit 5/min -j LOG --log-prefix "ip6tables denied: " --log-level 7 +-A INPUT -j DROP +-A LOGDROP -j LOG --log-prefix "ip6tables denied ssh: " --log-level 7 +-A LOGDROP -j DROP +COMMIT diff --git a/ansible/roles/common/tasks/main.yaml b/ansible/roles/common/tasks/main.yaml new file mode 100644 index 0000000..4663c34 --- /dev/null +++ b/ansible/roles/common/tasks/main.yaml @@ -0,0 +1,27 @@ +- name: install common dependencies + ansible.builtin.apt: + name: + - iptables + - iptables-persistent + state: latest + update_cache: yes + cache_valid_time: 60 + +- name: ensure persistent iptables dir exists + ansible.builtin.file: + path: /etc/iptables + state: directory + +- name: copy base iptables rules + ansible.builtin.copy: + src: "iptables-{{ item }}" + dest: "/etc/iptables/{{ item }}" + loop: + - rules.v4 + - rules.v6 + +- name: apply base ipv4 iptables rules + ansible.builtin.shell: "iptables-restore /etc/iptables/rules.v4" + +- name: apply base ipv6 iptables rules + ansible.builtin.shell: "ip6tables-restore /etc/iptables/rules.v6" diff --git a/ansible/roles/influxdb/files/config.toml b/ansible/roles/influxdb/files/config.toml new file mode 100644 index 0000000..2fdcca6 --- /dev/null +++ b/ansible/roles/influxdb/files/config.toml @@ -0,0 +1,4 @@ +bolt-path = "/var/lib/influxdb/influxd.bolt" +engine-path = "/var/lib/influxdb/engine" +reporting-disabled = true +http-bind-address = ":8086" diff --git a/ansible/roles/influxdb/tasks/main.yaml b/ansible/roles/influxdb/tasks/main.yaml new file mode 100644 index 0000000..51065d9 --- /dev/null +++ b/ansible/roles/influxdb/tasks/main.yaml @@ -0,0 +1,89 @@ +- name: ensure jq and curl are present + ansible.builtin.apt: + name: + - jq + - curl + state: latest + update_cache: yes + cache_valid_time: 60 + +- name: enable WAN access to InfluxDB + ansible.builtin.iptables: + chain: INPUT + protocol: tcp + destination_port: "{{ influxdb.port }}" + jump: ACCEPT + action: insert + rule_num: 6 + +- name: fetch InfluxDB .deb package + ansible.builtin.get_url: + url: "https://dl.influxdata.com/influxdb/releases/influxdb2-{{ influxdb.version }}-amd64.deb" + checksum: "sha256:{{ influxdb.checksum }}" + dest: /tmp/influxdb.deb + +- name: fetch InfluxDB cli .deb package + ansible.builtin.get_url: + url: "https://dl.influxdata.com/influxdb/releases/influxdb2-client-{{ influxdb_cli.version }}-amd64.deb" + checksum: "sha256:{{ influxdb_cli.checksum }}" + dest: /tmp/influxdb-cli.deb + +- name: install InfluxDB + ansible.builtin.apt: + deb: /tmp/influxdb.deb + +- name: copy InfluxDB config + ansible.builtin.copy: + src: config.toml + dest: /etc/influxdb/config.toml + owner: root + group: root + +- name: install InfluxDB CLI + ansible.builtin.apt: + deb: /tmp/influxdb-cli.deb + +- name: enable and start InfluxDB + ansible.builtin.systemd: + name: influxdb + daemon_reload: true + enabled: true + state: restarted + +- name: perform InfluxDB initial setup + ansible.builtin.shell: | + influx setup \ + -u admin \ + -p "{{ generated_secrets.influxdb_admin_password }}" \ + -o tendermint \ + -b tendermint \ + -r 0 \ + -f + args: + creates: /root/.influxdbv2/configs + +# TODO(thane): Restrict telegraf user access when we know which specific +# permissions it needs. +- name: create Telegraf user and login for InfluxDB + ansible.builtin.shell: | + influx user create \ + -n telegraf \ + -p "{{ generated_secrets.influxdb_telegraf_password }}" \ + -o tendermint + influx auth create \ + -u telegraf \ + -o tendermint \ + --all-access + export TELEGRAF_TOKEN=$(influx auth ls --json | jq -r '.[] | select(.userName == "telegraf") | .token') + echo "influxdb_token: ${TELEGRAF_TOKEN}" > /root/telegraf-secret.yaml + args: + creates: /root/telegraf-secret.yaml + +- name: fetch InfluxDB telegraf token to distribute to nodes + ansible.builtin.fetch: + src: /root/telegraf-secret.yaml + dest: "{{ playbook_dir }}/roles/telegraf/vars/secret.yaml" + flat: true + +# TODO(thane): Enable TLS support for longer-running testnets +# See https://docs.influxdata.com/influxdb/v2.2/security/enable-tls/ diff --git a/ansible/roles/telegraf/tasks/main.yaml b/ansible/roles/telegraf/tasks/main.yaml new file mode 100644 index 0000000..188a07e --- /dev/null +++ b/ansible/roles/telegraf/tasks/main.yaml @@ -0,0 +1,26 @@ +- name: load Telegraf token + include_vars: + file: ../vars/secret.yaml + name: telegraf_secret + +- name: fetch Telegraf .deb package + ansible.builtin.get_url: + url: "https://dl.influxdata.com/telegraf/releases/telegraf_{{ telegraf.version }}-1_amd64.deb" + checksum: "sha256:{{ telegraf.checksum }}" + dest: /tmp/telegraf.deb + +- name: install Telegraf + ansible.builtin.apt: + deb: /tmp/telegraf.deb + +- name: deploy Telegraf config + ansible.builtin.template: + src: telegraf.conf.j2 + dest: /etc/telegraf/telegraf.conf + +- name: enable and start Telegraf + ansible.builtin.service: + name: telegraf + enabled: true + state: restarted + diff --git a/ansible/roles/telegraf/templates/telegraf.conf.j2 b/ansible/roles/telegraf/templates/telegraf.conf.j2 new file mode 100644 index 0000000..69ff805 --- /dev/null +++ b/ansible/roles/telegraf/templates/telegraf.conf.j2 @@ -0,0 +1,218 @@ +# Telegraf Configuration +# +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. +# +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. +# +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. +# +# Environment variables can be used anywhere in this config file, simply surround +# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), +# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) + + +# Global tags can be specified here in key="value" format. +[global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## Maximum number of unwritten metrics per output. Increasing this value + ## allows for longer periods of output downtime without dropping metrics at the + ## cost of higher maximum memory usage. + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Collection offset is used to shift the collection by the given amount. + ## This can be be used to avoid many plugins querying constraint devices + ## at the same time by manually scheduling them in time. + # collection_offset = "0s" + + ## Default flushing interval for all outputs. Maximum flush_interval will be + ## flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## Collected metrics are rounded to the precision specified. Precision is + ## specified as an interval with an integer + unit (e.g. 0s, 10ms, 2us, 4s). + ## Valid time units are "ns", "us" (or "µs"), "ms", "s". + ## + ## By default or when set to "0s", precision will be set to the same + ## timestamp order as the collection interval, with the maximum being 1s: + ## ie, when interval = "10s", precision will be "1s" + ## when interval = "250ms", precision will be "1ms" + ## + ## Precision will NOT be used for service inputs. It is up to each individual + ## service input to set the timestamp at the appropriate precision. + precision = "0s" + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + +############################################################################### +# OUTPUT PLUGINS # +############################################################################### + +# Configuration for sending metrics to InfluxDB 2.0 +[[outputs.influxdb_v2]] + ## The URLs of the InfluxDB cluster nodes. + ## + ## Multiple URLs can be specified for a single cluster, only ONE of the + ## urls will be written to each interval. + ## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"] + urls = ["http://{{ hostvars['monitor']['ansible_host'] }}:8086"] + + ## Token for authentication. + token = "{{ telegraf_secret.influxdb_token }}" + + ## Organization is the name of the organization you wish to write to. + organization = "tendermint" + + ## Destination bucket to write into. + bucket = "tendermint" + + +############################################################################### +# INPUT PLUGINS # +############################################################################### + + +# Read metrics about cpu usage +[[inputs.cpu]] + ## Whether to report per-cpu stats or not + percpu = true + ## Whether to report total system cpu stats or not + totalcpu = true + ## If true, collect raw CPU time metrics + collect_cpu_time = false + ## If true, compute and report the sum of all non-idle CPU states + report_active = false + ## If true and the info is available then add core_id and physical_id tags + core_tags = false + + +# Read metrics about disk usage by mount point +[[inputs.disk]] + ## By default stats will be gathered for all mount points. + ## Set mount_points will restrict the stats to only the specified mount points. + # mount_points = ["/"] + + ## Ignore mount points by filesystem type. + ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] + + ## Ignore mount points by mount options. + ## The 'mount' command reports options of all mounts in parathesis. + ## Bind mounts can be ignored with the special 'bind' option. + # ignore_mount_opts = [] + + +# Read metrics about disk IO by device +[[inputs.diskio]] + ## By default, telegraf will gather stats for all devices including + ## disk partitions. + ## Setting devices will restrict the stats to the specified devices. + # devices = ["sda", "sdb", "vd*"] + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false + # + ## On systems which support it, device metadata can be added in the form of + ## tags. + ## Currently only Linux is supported via udev properties. You can view + ## available properties for a device by running: + ## 'udevadm info -q property -n /dev/sda' + ## Note: Most, but not all, udev properties can be accessed this way. Properties + ## that are currently inaccessible include DEVTYPE, DEVNAME, and DEVPATH. + # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"] + # + ## Using the same metadata source as device_tags, you can also customize the + ## name of the device via templates. + ## The 'name_templates' parameter is a list of templates to try and apply to + ## the device. The template may contain variables in the form of '$PROPERTY' or + ## '${PROPERTY}'. The first template which does not contain any variables not + ## present for the device is used as the device name tag. + ## The typical use case is for LVM volumes, to get the VG/LV name instead of + ## the near-meaningless DM-0 name. + # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"] + + +# Get kernel statistics from /proc/stat +[[inputs.kernel]] + # no configuration + + +# Read metrics about memory usage +[[inputs.mem]] + # no configuration + + +# Get the number of processes and group them by status +[[inputs.processes]] + # no configuration + + +# Read metrics about swap memory usage +[[inputs.swap]] + # no configuration + + +# Read metrics about system load & uptime +[[inputs.system]] + # no configuration + + +# Read metrics from one or many prometheus clients +[[inputs.prometheus]] + urls = ["http://localhost:{{ tendermint.prometheus_port }}/metrics"] + +# Parse the new lines appended to a file +[[inputs.tail]] + ## File names or a pattern to tail. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". ie: + ## "/var/log/**.log" -> recursively find all .log files in /var/log + ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log + ## "/var/log/apache.log" -> just tail the apache log file + ## "/var/log/log[!1-2]* -> tail files without 1-2 + ## "/var/log/log[^1-2]* -> identical behavior as above + ## See https://github.com/gobwas/glob for more examples + ## + files = ["{{ tendermint.log_file }}"] + + ## Read file from beginning. + from_beginning = false + + ## Data format to consume. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "json" + json_strict = false + diff --git a/ansible/roles/telegraf/vars/.gitkeep b/ansible/roles/telegraf/vars/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/ansible/roles/tendermint/tasks/main.yaml b/ansible/roles/tendermint/tasks/main.yaml new file mode 100644 index 0000000..2096cd0 --- /dev/null +++ b/ansible/roles/tendermint/tasks/main.yaml @@ -0,0 +1,41 @@ +- name: install Tendermint prerequisites + ansible.builtin.apt: + name: + - git + - gcc + - golang-1.18-go + state: latest + update_cache: yes + cache_valid_time: 60 + +- name: create tendermint group + ansible.builtin.group: + name: tendermint + state: present + +- name: create tendermint user + ansible.builtin.user: + name: tendermint + group: tendermint + home: "{{ tendermint.home_dir }}" + create_home: true + shell: /usr/bin/bash + state: present + +- name: ensure tendermint config dir exists + ansible.builtin.file: + path: "{{ tendermint.home_dir }}/config" + state: directory + owner: tendermint + group: tendermint + +- name: allow TCP port access to SSH, P2P and RPC + ansible.builtin.iptables: + chain: INPUT + protocol: tcp + destination_ports: + - "26656" + - "26657" + action: insert + rule_num: 6 + jump: ACCEPT diff --git a/ansible/roles/testapp/tasks/main.yaml b/ansible/roles/testapp/tasks/main.yaml new file mode 100644 index 0000000..875e168 --- /dev/null +++ b/ansible/roles/testapp/tasks/main.yaml @@ -0,0 +1,67 @@ +- name: deploy testapp execution scripts + ansible.builtin.template: + src: "{{ item }}.j2" + dest: "{{ tendermint.home_dir }}/{{ item }}" + owner: tendermint + group: tendermint + mode: 0775 + loop: + - run-testapp.sh + - stop-testapp.sh + +- name: clone Tendermint git repo + become_user: tendermint + ansible.builtin.git: + repo: https://github.com/tendermint/tendermint + dest: "{{ tendermint.home_dir }}/src" + version: "{{ tendermint.version }}" + clone: yes + update: yes + +- name: build testapp + become_user: tendermint + ansible.builtin.shell: + cmd: /usr/lib/go-1.18/bin/go build -o /usr/local/bin/tendermint-testapp + chdir: "{{ tendermint.home_dir }}/src/test/e2e/node" + +- name: touch Tendermint log file + ansible.builtin.file: + path: "{{ tendermint.log_file }}" + state: touch + owner: tendermint + group: tendermint + +- name: stop testapp + become_user: tendermint + ansible.builtin.shell: + cmd: ./stop-testapp.sh + chdir: "{{ tendermint.home_dir }}" + +- name: wipe testapp config and data + ansible.builtin.file: + path: "{{ item }}" + state: absent + loop: + - "{{ tendermint.home_dir }}/config" + - "{{ tendermint.home_dir }}/data" + +- name: copy testapp config + ansible.builtin.copy: + src: "{{ playbook_dir }}/testnet/{{ inventory_hostname }}/" + dest: "{{ tendermint.home_dir }}/" + owner: tendermint + group: tendermint + +# We run the testapp as a background process instead of as a service +# specifically so we can control how we kill the process in testnet +# perturbations. +# +# This also enables easier log collection with Telegraf, as it can just tail +# the JSON entries in the log file and ship them off to the monitor. +- name: start testapp + become_user: tendermint + ansible.builtin.shell: | + nohup ./run-testapp.sh /dev/null + args: + chdir: "{{ tendermint.home_dir }}" + diff --git a/ansible/roles/testapp/templates/run-testapp.sh.j2 b/ansible/roles/testapp/templates/run-testapp.sh.j2 new file mode 100644 index 0000000..9c45804 --- /dev/null +++ b/ansible/roles/testapp/templates/run-testapp.sh.j2 @@ -0,0 +1,7 @@ +#!/bin/bash +set -euo pipefail + +export TMHOME="{{ tendermint.home_dir }}" +tendermint-testapp {{ tendermint.home_dir }}/config/app.toml > {{ tendermint.log_file }} 2>&1 & +echo "$!" > {{ tendermint.pid_file }} + diff --git a/ansible/roles/testapp/templates/stop-testapp.sh.j2 b/ansible/roles/testapp/templates/stop-testapp.sh.j2 new file mode 100644 index 0000000..94253eb --- /dev/null +++ b/ansible/roles/testapp/templates/stop-testapp.sh.j2 @@ -0,0 +1,7 @@ +#!/bin/bash +set -euo pipefail + +if [ -f "{{ tendermint.pid_file }}" ]; then + kill `cat {{ tendermint.pid_file }}` || true + rm -rf {{ tendermint.pid_file }} +fi diff --git a/ansible/start-testapp.yaml b/ansible/start-testapp.yaml deleted file mode 100644 index 637b932..0000000 --- a/ansible/start-testapp.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: start testapp - hosts: validators - gather_facts: yes - - tasks: - - name: start the systemd-unit - ansible.builtin.systemd: - name: testappd - state: started - enabled: yes diff --git a/ansible/stop-testapp.yaml b/ansible/stop-testapp.yaml deleted file mode 100644 index 33d1a69..0000000 --- a/ansible/stop-testapp.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: start testapp - hosts: validators - gather_facts: yes - - tasks: - - name: start the systemd-unit - ansible.builtin.systemd: - name: testappd - state: stopped - enabled: yes diff --git a/ansible/templates/prometheus-node-exporter.service b/ansible/templates/prometheus-node-exporter.service deleted file mode 100644 index e8256fe..0000000 --- a/ansible/templates/prometheus-node-exporter.service +++ /dev/null @@ -1,13 +0,0 @@ -[Unit] -Description=Node Exporter -Wants=network-online.target -After=network-online.target - -[Service] -User=prometheus -Group=prometheus -Type=simple -ExecStart=/usr/bin/prometheus-node-exporter - -[Install] -WantedBy=multi-user.target diff --git a/ansible/templates/prometheus.service.j2 b/ansible/templates/prometheus.service.j2 deleted file mode 100644 index 2b2d59b..0000000 --- a/ansible/templates/prometheus.service.j2 +++ /dev/null @@ -1,17 +0,0 @@ -[Unit] -Description=Prometheus -Wants=network-online.target -After=network-online.target - -[Service] -User=prometheus -Group=prometheus -Type=simple -ExecStart=/usr/bin/prometheus \ - --config.file /etc/prometheus/prometheus.yml \ - --storage.tsdb.path /var/lib/prometheus/ \ - --web.console.templates=/etc/prometheus/consoles \ - --web.console.libraries=/etc/prometheus/console_libraries - -[Install] -WantedBy=multi-user.target diff --git a/ansible/templates/prometheus.yml.j2 b/ansible/templates/prometheus.yml.j2 deleted file mode 100644 index 1dee48d..0000000 --- a/ansible/templates/prometheus.yml.j2 +++ /dev/null @@ -1,21 +0,0 @@ -global: - scrape_interval: 10s # By default, scrape targets every 10 seconds. - evaluation_interval: 15s # Evaluate rules every 15 seconds. - -scrape_configs: -{% for host in groups['validators'] %} - - job_name: {{ hostvars[host].name }} - - scrape_interval: 5s - - static_configs: - - targets: ['{{ hostvars[host].inventory_hostname }}:26660'] - - - job_name: {{ hostvars[host].name }}-node-exporter - - scrape_interval: 5s - - static_configs: - - targets: ['{{ hostvars[host].inventory_hostname }}:9100'] - -{% endfor %} diff --git a/ansible/templates/testappd.service.j2 b/ansible/templates/testappd.service.j2 deleted file mode 100644 index 1c4a655..0000000 --- a/ansible/templates/testappd.service.j2 +++ /dev/null @@ -1,17 +0,0 @@ -# /etc/systemd/system/testappd.service -[Unit] -Description=Testapp Node -After=network.target - -[Service] -Type=simple -User={{ ansible_user_id }} -WorkingDirectory={{ ansible_user_dir }} -ExecStart={{ansible_user_dir }}/go/bin/node {{ tm_home }}config/app.toml -Environment=TMHOME={{ tm_home }} -Restart=on-failure -RestartSec=3 -LimitNOFILE=4096 - -[Install] -WantedBy=multi-user.target diff --git a/ansible/update-testapp.yaml b/ansible/update-testapp.yaml index 6f7ecc4..a5ca37b 100644 --- a/ansible/update-testapp.yaml +++ b/ansible/update-testapp.yaml @@ -1,25 +1,7 @@ -- name: update testapp - hosts: validators - become_method: sudo - gather_facts: yes - vars: - version_tag: v0.35.x - tm_home: /root/.testapp/ - - tasks: - - name: clone tendermint repo - ansible.builtin.git: - repo: https://github.com/tendermint/tendermint - dest: "{{ ansible_user_dir }}/tendermint" - version: "{{ version_tag }}" - - name: rebuild testapp - shell: "cd tendermint/test/e2e/node && /usr/lib/go-1.17/bin/go install" - - name: update unit file - template: - src: templates/testappd.service.j2 - dest: /lib/systemd/system/testappd.service - become: yes - - name: reload systemd daemon - ansible.builtin.systemd: - daemon_reload: yes - become: yes +--- +- hosts: nodes + become: no + vars_files: + - ./vars.yaml + roles: + - testapp diff --git a/ansible/vars.yaml b/ansible/vars.yaml new file mode 100644 index 0000000..83d68e4 --- /dev/null +++ b/ansible/vars.yaml @@ -0,0 +1,21 @@ +influxdb: + version: "2.2.0" + checksum: dccc6cbf8af734407488d9b91c71b72f49c8cf4da2746e891be09b16f9b510d6 + port: "8086" +influxdb_cli: + version: "2.3.0" + checksum: d88f9dd7707a4d1e1ecf6d04d9102626a7c72262dcf1ea28e5e795c2c01615aa +telegraf: + version: "1.23.0" + checksum: f00854dfaab40ecdda05eeab841cab77aff8242601b80310d46a97887c998c39 +tendermint: + # The Git branch/tag/commit hash of the Tendermint repo from which the E2E + # test app will be built. + version: "v0.35.x" + # This must match what has already been configured in the Tendermint nodes' + # config files. Changing this value will only affect the port that Telegraf + # polls for Prometheus metrics. + prometheus_port: "26660" + home_dir: /tendermint + log_file: /var/log/tendermint.log + pid_file: /tendermint/testapp.pid diff --git a/script/configgen.sh b/script/configgen.sh index b77e34d..44dded3 100755 --- a/script/configgen.sh +++ b/script/configgen.sh @@ -1,14 +1,26 @@ #!/bin/bash set -euo pipefail -NEW_IPS=$1 +ANSIBLE_HOSTS=$1 +E2E_RUNNER_VERSION=${E2E_RUNNER_VERSION:-v0.35.5} +E2E_RUNNER_URL="github.com/tendermint/tendermint/test/e2e/runner@${E2E_RUNNER_VERSION}" -go run github.com/tendermint/tendermint/test/e2e/runner@v0.35.5 setup -f ./testnet.toml +# Extract the IP addresses of all of the nodes (excluding the monitoring +# server) from the Ansible hosts file. IP addresses will be in the same order +# as those generated in the docker-compose.yml file, and will be separated by +# newlines. +NEW_IPS=`cat ${ANSIBLE_HOSTS} | grep -v 'monitor' | grep 'ansible_host' | awk -F' ansible_host=' '{print $2}' | head -c -1 | tr '\n' ','` + +go run ${E2E_RUNNER_URL} setup -f ./testnet.toml OLD_IPS=`grep -E '(ipv4_address|container_name)' ./testnet/docker-compose.yml | sed 's/^.*ipv4_address: \(.*\)/\1/g' | sed 's/.*container_name: \(.*\)/\1/g' | paste -sd ' \n' - | sort -k1 | cut -d ' ' -f2` while read old <&3 && read new <&4; do + echo "Swapping ${old} for ${new}" find ./testnet/ -type f | xargs -I{} sed -i "s/$old/$new/g" {} -done 3< <(echo $OLD_IPS | tr ' ' '\n') 4< <(echo $NEW_IPS | tr , '\n' ) +done 3< <(echo $OLD_IPS | tr ' ' '\n') 4< <(echo $NEW_IPS | tr , '\n' ) + +# Update configuration parameters +find ./testnet/ -name 'config.toml' | xargs -I{} sed -i "s/^log-format = .*$/log-format = \"json\"/g" {} rm -rf ./ansible/testnet mv ./testnet ./ansible diff --git a/script/runload.sh b/script/runload.sh new file mode 100755 index 0000000..e3145ad --- /dev/null +++ b/script/runload.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -euo pipefail + +ANSIBLE_HOSTS=$1 +LOAD_RUNNER_CMD=${LOAD_RUNNER_CMD:-"go run github.com/tendermint/tendermint/test/e2e/runner@51685158fe36869ab600527b852437ca0939d0cc"} +IP_LIST=`cat ${ANSIBLE_HOSTS} | grep -v 'monitor' | grep 'ansible_host' | awk -F' ansible_host=' '{print $2}' | head -c -1 | tr '\n' ','` + +${LOAD_RUNNER_CMD} load --ip-list ${IP_LIST} --seed-delta 42 diff --git a/script/secretsgen.sh b/script/secretsgen.sh new file mode 100755 index 0000000..f3cbf91 --- /dev/null +++ b/script/secretsgen.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -euo pipefail + +OUTPUT_FILE="$1" + +if [ ! -f "${OUTPUT_FILE}" ]; then + cat < ${OUTPUT_FILE} +generated_secrets: + influxdb_admin_password: "$(tr -cd '[:alnum:]' < /dev/urandom | fold -w30 | head -n1)" + influxdb_telegraf_password: "$(tr -cd '[:alnum:]' < /dev/urandom | fold -w30 | head -n1)" +EOF + echo "Generated secrets in ${OUTPUT_FILE}" +else + echo "${OUTPUT_FILE} already exists - reusing generated secrets" +fi + diff --git a/testnet.toml b/testnet.toml index 53d69c9..618a93b 100644 --- a/testnet.toml +++ b/testnet.toml @@ -7,53 +7,53 @@ persistent_peers = ["seed01"] [node.validator02] use_legacy_p2p = true persistent_peers = ["seed01"] -[node.validator03] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator04] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator05] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator06] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator07] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator08] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator09] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator10] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator11] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator12] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator13] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator14] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator15] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator16] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator17] -use_legacy_p2p = true -persistent_peers = ["seed01"] -[node.validator18] -use_legacy_p2p = true -persistent_peers = ["seed01"] +#[node.validator03] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator04] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator05] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator06] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator07] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator08] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator09] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator10] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator11] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator12] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator13] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator14] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator15] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator16] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator17] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] +#[node.validator18] +#use_legacy_p2p = true +#persistent_peers = ["seed01"] [node.seed01] use_legacy_p2p = true diff --git a/tf/Makefile b/tf/Makefile index 81236bd..0e34431 100644 --- a/tf/Makefile +++ b/tf/Makefile @@ -19,6 +19,7 @@ apply: terraform.tfvars -var='instance_names=[$(INSTANCE_NAMES)]' && \ terraform validate && \ terraform apply \ + -auto-approve \ -var='testnet_size=$(TESTNET_SIZE)' \ -var='instance_tags=["$(DO_INSTANCE_TAGNAME)"]' \ -var='instance_names=[$(INSTANCE_NAMES)]' @@ -26,6 +27,7 @@ apply: terraform.tfvars .PHONY: destroy destroy: terraform.tfvars terraform destroy \ + -auto-approve \ -var='testnet_size=$(TESTNET_SIZE)' \ -var='instance_tags=["$(DO_INSTANCE_TAGNAME)"]' \ -var='instance_names=[$(INSTANCE_NAMES)]' diff --git a/tf/nodes.tf b/tf/droplets.tf similarity index 52% rename from tf/nodes.tf rename to tf/droplets.tf index baba334..d85c958 100644 --- a/tf/nodes.tf +++ b/tf/droplets.tf @@ -1,22 +1,4 @@ -variable "testnet_size" { - type = number - default = 20 -} - -variable "ssh_keys" { - type = list(string) -} - -variable "instance_tags" { - type = list(string) - default = ["v035-testnet"] -} - -variable "instance_names" { - type = list(string) -} - -resource "digitalocean_droplet" "testnet-node" { +resource "digitalocean_droplet" "node" { count = var.testnet_size name = var.instance_names[count.index] image = "debian-11-x64" @@ -26,8 +8,8 @@ resource "digitalocean_droplet" "testnet-node" { ssh_keys = var.ssh_keys } -resource "digitalocean_droplet" "testnet-prometheus" { - name = "testnet-prometheus" +resource "digitalocean_droplet" "monitor" { + name = "monitor" image = "debian-11-x64" region = "fra1" tags = concat(var.instance_tags, ["testnet-observability"]) diff --git a/tf/hosts.tftpl b/tf/hosts.tftpl new file mode 100644 index 0000000..083d4ef --- /dev/null +++ b/tf/hosts.tftpl @@ -0,0 +1,6 @@ +${monitor.name} ansible_host=${monitor.ipv4_address} + +[nodes] +%{ for node in nodes ~} +${node.name} ansible_host=${node.ipv4_address} +%{ endfor ~} diff --git a/tf/outputs.tf b/tf/outputs.tf new file mode 100644 index 0000000..660de05 --- /dev/null +++ b/tf/outputs.tf @@ -0,0 +1,7 @@ +resource "local_file" "ansible_inventory" { + content = templatefile("hosts.tftpl", { + nodes = digitalocean_droplet.node.*, + monitor = digitalocean_droplet.monitor, + }) + filename = "../ansible/hosts" +} diff --git a/tf/project.tf b/tf/project.tf index 4dd4973..5124e3a 100644 --- a/tf/project.tf +++ b/tf/project.tf @@ -1,5 +1,5 @@ resource "digitalocean_project" "tm-testnet" { name = "tm-testnet" description = "A project to test the Tendermint codebase." - resources = concat([for node in digitalocean_droplet.testnet-node: node.urn], [digitalocean_droplet.testnet-prometheus.urn]) + resources = concat([for node in digitalocean_droplet.node: node.urn], [digitalocean_droplet.monitor.urn]) } diff --git a/tf/variables.tf b/tf/variables.tf new file mode 100644 index 0000000..5731ba3 --- /dev/null +++ b/tf/variables.tf @@ -0,0 +1,17 @@ +variable "testnet_size" { + type = number + default = 20 +} + +variable "ssh_keys" { + type = list(string) +} + +variable "instance_tags" { + type = list(string) + default = ["v035-testnet"] +} + +variable "instance_names" { + type = list(string) +} From 30882c89ad646f1b45992a04057e66992d576b0e Mon Sep 17 00:00:00 2001 From: Thane Thomson Date: Thu, 16 Jun 2022 20:08:37 -0400 Subject: [PATCH 2/5] Ignore the correct secret file Signed-off-by: Thane Thomson --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e076dcb..1a29009 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ /ansible/hosts # These files are auto-generated during deployment /ansible/secrets.yaml -/ansible/roles/telegraf/vars/secrets.yaml +/ansible/roles/telegraf/vars/secret.yaml From 9fda6071d6e99d9d71d3baede4c6676ba77e17aa Mon Sep 17 00:00:00 2001 From: Thane Thomson Date: Thu, 16 Jun 2022 20:09:01 -0400 Subject: [PATCH 3/5] Remove unnecessary config folder creation Signed-off-by: Thane Thomson --- ansible/roles/tendermint/tasks/main.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ansible/roles/tendermint/tasks/main.yaml b/ansible/roles/tendermint/tasks/main.yaml index 2096cd0..39b4c1e 100644 --- a/ansible/roles/tendermint/tasks/main.yaml +++ b/ansible/roles/tendermint/tasks/main.yaml @@ -22,13 +22,6 @@ shell: /usr/bin/bash state: present -- name: ensure tendermint config dir exists - ansible.builtin.file: - path: "{{ tendermint.home_dir }}/config" - state: directory - owner: tendermint - group: tendermint - - name: allow TCP port access to SSH, P2P and RPC ansible.builtin.iptables: chain: INPUT From 480710c6a12e7e31b22ac7d7b71a9b9dc47295d7 Mon Sep 17 00:00:00 2001 From: Thane Thomson Date: Thu, 16 Jun 2022 20:13:50 -0400 Subject: [PATCH 4/5] Remove unused Ansible Makefile Signed-off-by: Thane Thomson --- ansible/Makefile | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 ansible/Makefile diff --git a/ansible/Makefile b/ansible/Makefile deleted file mode 100644 index fb80627..0000000 --- a/ansible/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -.PHONY: deploy -deploy: - ansible-playbook -i hosts -u root base.yaml -f 10 && \ - ansible-playbook -i hosts -u root init-testapp.yaml -f 10 && \ - ansible-playbook -i hosts -u root update-testapp.yaml -f 10 From dc84bf46122a365b8f75c5cdd0a2632ac1b93ff1 Mon Sep 17 00:00:00 2001 From: Thane Thomson Date: Fri, 17 Jun 2022 07:08:28 -0400 Subject: [PATCH 5/5] Restore testnet.toml file to its original state Signed-off-by: Thane Thomson --- testnet.toml | 96 ++++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/testnet.toml b/testnet.toml index 618a93b..53d69c9 100644 --- a/testnet.toml +++ b/testnet.toml @@ -7,53 +7,53 @@ persistent_peers = ["seed01"] [node.validator02] use_legacy_p2p = true persistent_peers = ["seed01"] -#[node.validator03] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator04] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator05] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator06] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator07] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator08] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator09] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator10] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator11] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator12] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator13] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator14] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator15] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator16] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator17] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] -#[node.validator18] -#use_legacy_p2p = true -#persistent_peers = ["seed01"] +[node.validator03] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator04] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator05] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator06] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator07] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator08] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator09] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator10] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator11] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator12] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator13] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator14] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator15] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator16] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator17] +use_legacy_p2p = true +persistent_peers = ["seed01"] +[node.validator18] +use_legacy_p2p = true +persistent_peers = ["seed01"] [node.seed01] use_legacy_p2p = true