Skip to content

Commit

Permalink
atom runner update
Browse files Browse the repository at this point in the history
Signed-off-by: Barak Davidov <[email protected]>
  • Loading branch information
barakda authored and Alexander Indenbaum committed Jan 7, 2025
1 parent 2bbe566 commit f529009
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 138 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ CEPH_SHA=8e86ce7b9675cd070aa1f7f675fb688f1f862e13
CEPH_DEVEL_MGR_PATH=../ceph

# Atom
ATOM_SHA=3c0b7531fd1022d97d5600a8ead51992e2a40ec0
ATOM_SHA=7522d1bf5c3e1484965c0f5b8316c13bca463065

# Demo settings
RBD_POOL=rbd
Expand Down
21 changes: 5 additions & 16 deletions .github/workflows/build-container.yml
Original file line number Diff line number Diff line change
Expand Up @@ -845,34 +845,23 @@ jobs:
atom:
needs: [build, build-ceph]
if: github.repository == 'ceph/ceph-nvmeof'
runs-on: ibmcloud-1
# if: github.repository == 'ceph/ceph-nvmeof'
runs-on: atomRunner
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Atom env initialization
run: |
. .env
ACTION_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
./tests/atom/atomEnvInit.sh $ATOM_SHA $ACTION_URL
- name: Download container images
uses: actions/download-artifact@v4
with:
pattern: container_images_nvmeof
merge-multiple: true

- name: Load container images
run: |
docker load < nvmeof.tar
docker load < nvmeof-cli.tar
- name: Cluster build and Atom tests run
if: always() || failure()
run: |
. .env
./tests/atom/clusterBuildTestsRun.sh $NVMEOF_VERSION $CEPH_SHA $ATOM_SHA
ACTION_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
./tests/atom/clusterBuildTestsRun.sh $NVMEOF_VERSION $CEPH_SHA $ATOM_SHA $ACTION_URL
- name: Atom artifact build
if: always()
Expand All @@ -882,7 +871,7 @@ jobs:
if: always()
with:
name: atom-artifact
path: /tmp/artifact/*
path: /home/cephnvme/artifact.tar.gz

push-images-to-ceph-registry:
if: github.event_name == 'release'
Expand Down
85 changes: 0 additions & 85 deletions tests/atom/atomEnvInit.sh

This file was deleted.

107 changes: 76 additions & 31 deletions tests/atom/clusterBuildTestsRun.sh
Original file line number Diff line number Diff line change
@@ -1,44 +1,89 @@
#!/bin/bash

# if a command fails (returns a non-zero exit code), terminate immediately
# the exit code will be the same as the exit code of the failed command.
# see https://github.com/ceph/ceph-nvmeof/actions/runs/11928539421/job/33246031083
set -e


VERSION=$1
if [ "$2" = "latest" ]; then
CEPH_SHA=$(curl -s https://shaman.ceph.com/api/repos/ceph/main/latest/centos/9/ | jq -r ".[] | select(.archs[] == \"$(uname -m)\" and .status == \"ready\") | .sha1")
else
CEPH_SHA=$2
fi
ATOM_SHA=$3
ACTION_URL=$4
NIGHTLY=$5

RUNNER_FILDER='/home/cephnvme/actions-runner-ceph'

# Check if cluster is busy with another run
while true; do
if [ -f "/home/cephnvme/busyServer.txt" ]; then
echo "The server is busy with another github action job, please wait..."
sleep 90
else
echo "The server is available for use!"
echo $ACTION_URL > /home/cephnvme/busyServer.txt
chmod +rx /home/cephnvme/busyServer.txt
break
fi
done

# Remove previous run data
hostname
rm -rf $RUNNER_FILDER/ceph-nvmeof-atom
sudo rm -rf /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6/*
sudo ls -lta /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6

# Cloning atom repo
cd $RUNNER_FILDER
git clone [email protected]:NVME-Over-Fiber/ceph-nvmeof-atom.git

# Switch to given SHA
cd ceph-nvmeof-atom
git checkout $ATOM_SHA

# Build atom images based on the cloned repo
docker build -t nvmeof_atom:$ATOM_SHA .

# Atom test script run
# Description of the uncleared flags with their default values
# - Upgrade ceph image target (None)
# - Upgrade nvmeof image target (None)
# - Nvmeof cli image use in target (None)
# - Number of gateways (4)
# - Number of gateways to stop (1)
# - Number of gateways after scale down (1)
# - Number of subsystems (2)
# - Number of namespaces (4)
# - Max namespaces per subsystem (1024)
# - HA failover cycles (2)
# - HA failover cycles after upgrade (2)
# - RBD size (200M)
# - Seed number (0)
# - FIO use (1=run fio, 0=don't run fio)
set -x
sudo docker run \
-v /root/.ssh:/root/.ssh \
nvmeof_atom:"$ATOM_SHA" \
python3 cephnvme_atom.py \
quay.ceph.io/ceph-ci/ceph:"$CEPH_SHA" \
quay.io/ceph/nvmeof:"$VERSION" \
quay.io/ceph/nvmeof-cli:"$VERSION" \
None None None None None None 4 1 1 2 4 1024 2 2 200M 0 1 20 20 1 \
--stopNvmeofDaemon \
--stopNvmeofSystemctl \
--stopMonLeader \
--rmNvmeofDaemon \
--gitHubActionDeployment \
--dontUseMTLS \
--skiplbTest \
--journalctlToConsole \
--dontPowerOffCloudVMs noKey noKey \
--multiIBMCloudServers_m2
python3 atom.py \
--project=nvmeof \
--ceph-img=quay.ceph.io/ceph-ci/ceph:"$CEPH_SHA" \
--gw-img=quay.io/ceph/nvmeof:"$VERSION" \
--cli-img=quay.io/ceph/nvmeof-cli:"$VERSION" \
--initiators=1 \
--gw-group-num=1 \
--gw-num=4 \
--gw-to-stop-num=1 \
--gw-scale-down-num=1 \
--subsystem-num=2 \
--ns-num=4 \
--subsystem-max-ns-num=1024 \
--failover-num=2 \
--failover-num-after-upgrade=2 \
--rbd-size=200M \
--fio-devices-num=1 \
--lb-timeout=20 \
--config-dbg-mon=10 \
--config-dbg-ms=1 \
--nvmeof-daemon-stop \
--nvmeof-systemctl-stop \
--mon-leader-stop \
--mon-client-kill \
--nvmeof-daemon-remove \
--redeploy-gws \
--github-action-deployment \
--skip-di-test \
--skip-lb-group-change-test \
--skip-block-list-test \
--skip-ns-rebalancing-test \
--journalctl-to-console \
--dont-power-off-cloud-vms \
--env=m6
set +x
19 changes: 14 additions & 5 deletions tests/atom/cpArtifactAndCleanup.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
#!/bin/bash

sudo rm -rf /tmp/artifact/multiIBMCloudServers_m2
sudo cp -r /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m2 /tmp/artifact
sudo ls -lta /tmp/artifact
sudo chmod -R +rx /tmp/artifact
rm -rf /home/cephnvme/busyServer.txt
sudo rm -rf /home/cephnvme/artifact/*
sudo ls -lta /home/cephnvme/artifact

sudo rm -rf /home/cephnvme/artifact.tar.gz
sudo ls -lta /home/cephnvme/

sudo cp -r /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6 /home/cephnvme/artifact
sudo ls -lta /home/cephnvme/artifact

sudo tar -czf /home/cephnvme/artifact.tar.gz -C /home/cephnvme/artifact .
sudo ls -lta /home/cephnvme/artifact
sudo ls -lta /home/cephnvme
sudo chmod +rx /home/cephnvme/artifact.tar.gz
sudo rm -rf /home/cephnvme/busyServer.txt

0 comments on commit f529009

Please sign in to comment.