Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

squid integration #995

Merged
merged 4 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@ SPDK_CENTOS_REPO_VER="9.0-21.el9"

# Ceph Cluster
CEPH_CLUSTER_VERSION="${CEPH_VERSION}"
CEPH_BRANCH=main
CEPH_SHA=latest
CEPH_BRANCH=wip-baum-squid-nvmeof-20250105-00
CEPH_SHA=8e86ce7b9675cd070aa1f7f675fb688f1f862e13

CEPH_DEVEL_MGR_PATH=../ceph

# Atom
ATOM_SHA=3c0b7531fd1022d97d5600a8ead51992e2a40ec0
ATOM_SHA=7522d1bf5c3e1484965c0f5b8316c13bca463065

# Demo settings
RBD_POOL=rbd
Expand Down
23 changes: 6 additions & 17 deletions .github/workflows/build-container.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on: # yamllint disable rule:truthy
- '*'
pull_request:
branches:
- devel
- '*'
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
Expand Down Expand Up @@ -845,34 +845,23 @@ jobs:

atom:
needs: [build, build-ceph]
if: github.repository == 'ceph/ceph-nvmeof'
runs-on: ibmcloud-1
# if: github.repository == 'ceph/ceph-nvmeof'
runs-on: atomRunner
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Atom env initialization
run: |
. .env
ACTION_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
./tests/atom/atomEnvInit.sh $ATOM_SHA $ACTION_URL

- name: Download container images
uses: actions/download-artifact@v4
with:
pattern: container_images_nvmeof
merge-multiple: true

- name: Load container images
run: |
docker load < nvmeof.tar
docker load < nvmeof-cli.tar

- name: Cluster build and Atom tests run
if: always() || failure()
run: |
. .env
./tests/atom/clusterBuildTestsRun.sh $NVMEOF_VERSION $CEPH_SHA $ATOM_SHA
ACTION_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
./tests/atom/clusterBuildTestsRun.sh $NVMEOF_VERSION $CEPH_SHA $ATOM_SHA $ACTION_URL

- name: Atom artifact build
if: always()
Expand All @@ -882,7 +871,7 @@ jobs:
if: always()
with:
name: atom-artifact
path: /tmp/artifact/*
path: /home/cephnvme/artifact.tar.gz

push-images-to-ceph-registry:
if: github.event_name == 'release'
Expand Down
85 changes: 0 additions & 85 deletions tests/atom/atomEnvInit.sh

This file was deleted.

107 changes: 76 additions & 31 deletions tests/atom/clusterBuildTestsRun.sh
Original file line number Diff line number Diff line change
@@ -1,44 +1,89 @@
#!/bin/bash

# if a command fails (returns a non-zero exit code), terminate immediately
# the exit code will be the same as the exit code of the failed command.
# see https://github.com/ceph/ceph-nvmeof/actions/runs/11928539421/job/33246031083
set -e


VERSION=$1
if [ "$2" = "latest" ]; then
CEPH_SHA=$(curl -s https://shaman.ceph.com/api/repos/ceph/main/latest/centos/9/ | jq -r ".[] | select(.archs[] == \"$(uname -m)\" and .status == \"ready\") | .sha1")
else
CEPH_SHA=$2
fi
ATOM_SHA=$3
ACTION_URL=$4
NIGHTLY=$5

RUNNER_FILDER='/home/cephnvme/actions-runner-ceph'

# Check if cluster is busy with another run
while true; do
if [ -f "/home/cephnvme/busyServer.txt" ]; then
echo "The server is busy with another github action job, please wait..."
sleep 90
else
echo "The server is available for use!"
echo $ACTION_URL > /home/cephnvme/busyServer.txt
chmod +rx /home/cephnvme/busyServer.txt
break
fi
done

# Remove previous run data
hostname
rm -rf $RUNNER_FILDER/ceph-nvmeof-atom
sudo rm -rf /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6/*
sudo ls -lta /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6

# Cloning atom repo
cd $RUNNER_FILDER
git clone [email protected]:NVME-Over-Fiber/ceph-nvmeof-atom.git

# Switch to given SHA
cd ceph-nvmeof-atom
git checkout $ATOM_SHA

# Build atom images based on the cloned repo
docker build -t nvmeof_atom:$ATOM_SHA .

# Atom test script run
# Description of the uncleared flags with their default values
# - Upgrade ceph image target (None)
# - Upgrade nvmeof image target (None)
# - Nvmeof cli image use in target (None)
# - Number of gateways (4)
# - Number of gateways to stop (1)
# - Number of gateways after scale down (1)
# - Number of subsystems (2)
# - Number of namespaces (4)
# - Max namespaces per subsystem (1024)
# - HA failover cycles (2)
# - HA failover cycles after upgrade (2)
# - RBD size (200M)
# - Seed number (0)
# - FIO use (1=run fio, 0=don't run fio)
set -x
sudo docker run \
-v /root/.ssh:/root/.ssh \
nvmeof_atom:"$ATOM_SHA" \
python3 cephnvme_atom.py \
quay.ceph.io/ceph-ci/ceph:"$CEPH_SHA" \
quay.io/ceph/nvmeof:"$VERSION" \
quay.io/ceph/nvmeof-cli:"$VERSION" \
None None None None None None 4 1 1 2 4 1024 2 2 200M 0 1 20 20 1 \
--stopNvmeofDaemon \
--stopNvmeofSystemctl \
--stopMonLeader \
--rmNvmeofDaemon \
--gitHubActionDeployment \
--dontUseMTLS \
--skiplbTest \
--journalctlToConsole \
--dontPowerOffCloudVMs noKey noKey \
--multiIBMCloudServers_m2
python3 atom.py \
--project=nvmeof \
--ceph-img=quay.ceph.io/ceph-ci/ceph:"$CEPH_SHA" \
--gw-img=quay.io/ceph/nvmeof:"$VERSION" \
--cli-img=quay.io/ceph/nvmeof-cli:"$VERSION" \
--initiators=1 \
--gw-group-num=1 \
--gw-num=4 \
--gw-to-stop-num=1 \
--gw-scale-down-num=1 \
--subsystem-num=2 \
--ns-num=4 \
--subsystem-max-ns-num=1024 \
--failover-num=2 \
--failover-num-after-upgrade=2 \
--rbd-size=200M \
--fio-devices-num=1 \
--lb-timeout=20 \
--config-dbg-mon=10 \
--config-dbg-ms=1 \
--nvmeof-daemon-stop \
--nvmeof-systemctl-stop \
--mon-leader-stop \
--mon-client-kill \
--nvmeof-daemon-remove \
--redeploy-gws \
--github-action-deployment \
--skip-di-test \
--skip-lb-group-change-test \
--skip-block-list-test \
--skip-ns-rebalancing-test \
--journalctl-to-console \
--dont-power-off-cloud-vms \
--env=m6
set +x
19 changes: 14 additions & 5 deletions tests/atom/cpArtifactAndCleanup.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
#!/bin/bash

sudo rm -rf /tmp/artifact/multiIBMCloudServers_m2
sudo cp -r /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m2 /tmp/artifact
sudo ls -lta /tmp/artifact
sudo chmod -R +rx /tmp/artifact
rm -rf /home/cephnvme/busyServer.txt
sudo rm -rf /home/cephnvme/artifact/*
sudo ls -lta /home/cephnvme/artifact

sudo rm -rf /home/cephnvme/artifact.tar.gz
sudo ls -lta /home/cephnvme/

sudo cp -r /root/.ssh/atom_backup/artifact/multiIBMCloudServers_m6 /home/cephnvme/artifact
sudo ls -lta /home/cephnvme/artifact

sudo tar -czf /home/cephnvme/artifact.tar.gz -C /home/cephnvme/artifact .
sudo ls -lta /home/cephnvme/artifact
sudo ls -lta /home/cephnvme
sudo chmod +rx /home/cephnvme/artifact.tar.gz
sudo rm -rf /home/cephnvme/busyServer.txt
11 changes: 7 additions & 4 deletions tests/ha/main_exit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@ background_task() {
docker compose top

echo ℹ️ Send nvme-gw create for all gateways
GW_NAME=''
GW_GROUP=''
i=1 # a single gw index
GW_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | grep -v discovery | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}')
while [ ! -n "$GW_NAME" ]; do
sleep 1
GW_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | grep -v discovery | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}')
done
echo 📫 nvme-gw create gateway: \'$GW_NAME\' pool: \'$POOL\', group: \'$GW_GROUP\'
docker compose exec -T ceph ceph nvme-gw create $GW_NAME $POOL "$GW_GROUP"
docker compose exec -T ceph ceph nvme-gw show $POOL "$GW_GROUP"

echo ℹ️ Wait for gateway to be ready
while true; do
Expand All @@ -43,12 +48,10 @@ background_task() {
continue
fi
GW_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW_NAME")"
if docker compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems 2>&1 | grep -i failed; then
if ! docker compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems; then
echo "Container $i $GW_NAME $GW_IP no subsystems. Waiting..."
continue
fi
echo "Container $i $GW_NAME $GW_IP subsystems:"
docker compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems
break
done

Expand Down
11 changes: 6 additions & 5 deletions tests/ha/wait_gateways.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ if [ $# -ge 1 ]; then
fi
for i in $(seq $SCALE); do
while true; do
sleep 1 # Adjust the sleep duration as needed
GW_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}')
GW_NAME=''
while [ ! -n "$GW_NAME" ]; do
sleep 1 # Adjust the sleep duration as needed
GW_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}')
done
container_status=$(docker inspect -f '{{.State.Status}}' "$GW_NAME")
if [ "$container_status" = "running" ]; then
echo "Container $i $GW_NAME is now running."
Expand All @@ -25,12 +28,10 @@ for i in $(seq $SCALE); do
continue
fi
GW_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW_NAME")"
if docker compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems 2>&1 | grep -i failed; then
if ! docker compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems; then
echo "Container $i $GW_NAME $GW_IP no subsystems. Waiting..."
continue
fi
echo "Container $i $GW_NAME $GW_IP subsystems:"
docker compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems
break;
done
done
Loading