Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Managed CSC for in-cluster CSM #1674

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 57 additions & 12 deletions asmcli/asmcli

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 14 additions & 8 deletions asmcli/commands/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,20 @@ apply_kube_yamls() {
done
}

install_canonical_controller() {
info "Installing ASM CanonicalService controller in asm-system namespace..."
retry 3 kubectl apply -f "${CANONICAL_CONTROLLER_MANIFEST}"
info "Waiting for deployment..."
retry 3 kubectl wait --for=condition=available --timeout=600s \
verify_canonical_controller() {
local IN_CLUSTER_CSC_DEP; IN_CLUSTER_CSC_DEP="$(kubectl get deployment/canonical-service-controller-manager \
-n asm-system --ignore-not-found=true || true)"
if [[ -z "$IN_CLUSTER_CSC_DEP" ]]; then
info "Checking Managed CanonicalService controller state..."
check_managed_canonical_controller_state
else
warn "Kindly migrate to managed canonical service controller. Refer <DocLink to be added>"
info "Updating ASM CanonicalService controller in asm-system namespace..."
retry 3 kubectl apply -f "${CANONICAL_CONTROLLER_MANIFEST}"
info "Waiting for deployment..."
retry 3 kubectl wait --for=condition=available --timeout=600s \
deployment/canonical-service-controller-manager -n asm-system
fi
info "...done!"
}

Expand Down Expand Up @@ -266,7 +274,5 @@ install_control_plane() {
if use_fleet_api; then install_fleet_api; else install_control_plane_revision; fi
fi

if [[ "$DISABLE_CANONICAL_SERVICE" -eq 0 ]] && ! is_managed; then
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to install managed CSC only when in-cluster CSC is not present, we will need to check for existence of in-cluster CSC.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that we are doing this in install_managed_canonical_controller but we need to do kubectl apply -f "${CANONICAL_CONTROLLER_MANIFEST}" if in-cluster CP exists if there is change in in-cluster csc manifest.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved the logic to update the existing in-cluster CSC Deployment and checking the managed CSC status under one routine "verify_canonical_controller".

install_canonical_controller
fi
verify_canonical_controller
}
7 changes: 3 additions & 4 deletions asmcli/commands/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ validate_dependencies() {
if ! is_stackdriver_enabled; then
enable_stackdriver_kubernetes
fi
if needs_service_mesh_feature; then
enable_service_mesh_feature
fi
enable_service_mesh_feature
if [[ "${CA}" == "managed_cas" ]]; then
x_wait_for_gke_hub_api_enablement
x_enable_workload_certificate_on_fleet "gkehub.googleapis.com"
Expand All @@ -64,8 +62,9 @@ validate_dependencies() {
exit_if_service_mesh_feature_not_enabled
fi
fi
else
enable_service_mesh_feature
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will enable_service_mesh_feature for offGCP platforms in asmcli validate command also.

So asmcli install and asmcli validate both calls validate_dependencies. in asmcli validate command user can not pass --enable* flags so can_modify_gcp_components gives false and we exit_if_service_mesh_feature_not_enabled.

Now I think we can move out the logic from is_gcp section and add something like

if can_modify_gcp_components
   enable_service_mesh_feature
else
   exit_if_service_mesh_feature_not_enabled

So now we will need extra enable_gcp_components flag also for offGCP platforms.

fi

if can_register_cluster; then
register_cluster
exit_if_cluster_unregistered
Expand Down
40 changes: 40 additions & 0 deletions asmcli/lib/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -853,3 +853,43 @@ get_monitoring_config_membership_json () {
CONFIG="$(gcloud container hub memberships describe "${MEMBERSHIP_NAME}" --project "${PROJECT_ID}" --format="json(monitoringConfig)")"
echo "${CONFIG}"
}

check_managed_canonical_controller_state() {
local PROJECT_ID; PROJECT_ID="$(context_get-option "PROJECT_ID")"
local CLUSTER_NAME; CLUSTER_NAME="$(context_get-option "CLUSTER_NAME")"
local CLUSTER_LOCATION; CLUSTER_LOCATION="$(context_get-option "CLUSTER_LOCATION")"
local MEMBERSHIP_NAME; MEMBERSHIP_NAME="$(generate_membership_name "${PROJECT_ID}" "${CLUSTER_LOCATION}" "${CLUSTER_NAME}")"
local FLEET_ID; FLEET_ID="$(context_get-option "FLEET_ID")"

local CSC_STATUS_AVAILABLE=0
local CS_ERROR="CANONICAL_SERVICE_ERROR"
local MEMBERSHIP_STATE;
local STATE;
local CODE;

for i in {1..10}; do
STATE=$( gcloud container fleet mesh describe --project "${FLEET_ID}" --format=json )
MEMBERSHIP_STATE=$( gcloud container fleet mesh describe --project "${FLEET_ID}" --format=json | \
jq '.membershipStates | with_entries(select(.key| endswith("'/"${MEMBERSHIP_NAME}"'")))[]' )
CODE=$( jq -r '.state.code' <<< "$MEMBERSHIP_STATE" )
if [ "$CODE" = "OK" ]; then
info "Managed Canonical Service Controller working successfully"
CSC_STATUS_AVAILABLE=1; break
elif [ "$CODE" = "WARNING" ]; then
if jq -r '.servicemesh.conditions[].code' <<< "$MEMBERSHIP_STATE" | grep -q "$CS_ERROR" ; then
warn "Managed Canonical Service Controller facing issues. Kindly refer to <wiki link>"
CSC_STATUS_AVAILABLE=1
fi
break
else
echo "STATE: $STATE"
echo "MEMBERSHIP_STATE: $MEMBERSHIP_STATE. Retry to get featureState.code for the membership: $MEMBERSHIP_NAME"
sleep 60
fi
done

if [ ${CSC_STATUS_AVAILABLE} -eq 0 ]; then
warn "Unable to verify Managed Canonical Service Controller State. Kindly refer to <wiki link>"
fi
}

34 changes: 34 additions & 0 deletions asmcli/tests/lib/util.bats
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ setup() {
load '../unit_test_common.bash'
_common_setup
CITADEL_MANIFEST="citadel-ca.yaml"
PROJECT_ID="test-project"
CLUSTER_NAME="test_cluster"
CLUSTER_LOCATION="us-east-2a"
context_init
}

Expand Down Expand Up @@ -189,3 +192,34 @@ EOF

rm "${LOG_FILE_LOCATION}"
}

@test "UTIL: Managed Canonical Controller Status is read correctly" {

_intercept_setup
run context_set-option "HUB_MEMBERSHIP_ID" "test-cluster"

run context_set-option "FLEET_ID" "unknown-state-fleet"
run check_managed_canonical_controller_state
assert_output --partial "Unable to verify Managed Canonical Service Controller State"

run context_set-option "FLEET_ID" "error-state-fleet"
run check_managed_canonical_controller_state
assert_output --partial "Unable to verify Managed Canonical Service Controller State"

run context_set-option "FLEET_ID" "warning-non-csc-condition-state-fleet"
run check_managed_canonical_controller_state
assert_output --partial "Unable to verify Managed Canonical Service Controller State"

run context_set-option "FLEET_ID" "ok-state-fleet"
run check_managed_canonical_controller_state
assert_output --partial "Managed Canonical Service Controller working successfully"

run context_set-option "FLEET_ID" "warning-csc-condition-state-fleet"
run check_managed_canonical_controller_state
assert_output --partial "Managed Canonical Service Controller facing issues"

run context_set-option "FLEET_ID" "multi-cluster-fleet"
run check_managed_canonical_controller_state
assert_output --partial "Managed Canonical Service Controller facing issues"

}
8 changes: 7 additions & 1 deletion asmcli/tests/run_basic_suite
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@ main() {
run_build_offline_package "${OUTPUT_DIR}"

run_basic_test "install" "mesh_ca" "--revision_name ${REVISION_LABEL} --offline"; RETVAL=$?;
cleanup_lt_cluster "${LT_NAMESPACE}" "${OUTPUT_DIR}" "${REV}"

echo "Verifying service mesh feature is enabled..."
if ! is_service_mesh_feature_enabled; then
fatal "Service mesh feature is not enabled."
fi

cleanup_lt_cluster "${LT_NAMESPACE}" "${OUTPUT_DIR}" "${REV}"
delete_service_mesh_feature
exit "${RETVAL}"
}

Expand Down
8 changes: 7 additions & 1 deletion asmcli/tests/run_basic_suite_managed
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@ main() {
parse_args "$@"

run_basic_test "install" "mesh_ca" "--managed --enable-registration"; RETVAL=$?;
cleanup_lt_cluster "${LT_NAMESPACE}" "${OUTPUT_DIR}" "${REV}"

echo "Verifying service mesh feature is enabled..."
if ! is_service_mesh_feature_enabled; then
fatal "Service mesh feature is not enabled."
fi

cleanup_lt_cluster "${LT_NAMESPACE}" "${OUTPUT_DIR}" "${REV}"
delete_service_mesh_feature
exit "${RETVAL}"
}

Expand Down
106 changes: 106 additions & 0 deletions asmcli/tests/unit_test_common.bash
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,112 @@ EOF
return 0
fi

if [[ "${*}" == *"fleet mesh describe"* ]]; then
if [[ "${*}" == *"error-state-fleet"* ]]; then
cat <<EOF
{
"membershipStates": {
"memberships/test-cluster": {
"servicemesh":"",
"state": {
"code": "ERROR"
}
}
}
}
EOF
elif [[ "${*}" == *"ok-state-fleet"* ]]; then
cat <<EOF
{
"membershipStates": {
"memberships/test-cluster": {
"servicemesh":"",
"state": {
"code": "OK"
}
}
}
}
EOF
elif [[ "${*}" == *"warning-non-csc-condition-state-fleet"* ]]; then
cat <<EOF
{
"membershipStates": {
"memberships/test-cluster": {
"servicemesh": {
"conditions": [
{
"code": "CONTROL_PLANE_ISSUE",
"details": "Non CSC Error",
"severity": "WARNING"
}
]
},
"state": {
"code": "WARNING"
}
}
}
}
EOF
elif [[ "${*}" == *"warning-csc-condition-state-fleet"* ]]; then
cat <<EOF
{
"membershipStates": {
"memberships/test-cluster": {
"servicemesh": {
"conditions": [
{
"code": "CONTROL_PLANE_ISSUE",
"details": "Non CSC Error",
"severity": "WARNING"
},
{
"code": "CANONICAL_SERVICE_ERROR",
"details": "CSC Error",
"severity": "WARNING"
}
]
},
"state": {
"code": "WARNING"
}
}
}
}
EOF
elif [[ "${*}" == *"multi-cluster-fleet"* ]]; then
cat <<EOF
{
"membershipStates": {
"memberships/test-cluster-1": {
"servicemesh": "",
"state": {
"code": "OK"
}
},
"memberships/test-cluster": {
"servicemesh": {
"conditions": [
{
"code": "CANONICAL_SERVICE_ERROR",
"details": "CSC Error",
"severity": "WARNING"
}
]
},
"state": {
"code": "WARNING"
}
}
}
}
EOF
else
echo ""
fi
return 0
fi
return 1
}

Expand Down