From a739659e386858ae79b5b1b3665797380edb51c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Mac=C3=ADk?= Date: Fri, 26 Apr 2024 13:45:32 +0200 Subject: [PATCH] feat(load-test): Use AWS s3 for tekton results in dev mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pavel Macík --- cmd/loadTests.go | 52 +---- .../load-tests/ci-scripts/collect-results.sh | 2 +- tests/load-tests/ci-scripts/load-test.sh | 4 +- .../max-concurrency/cluster_read_config.yaml | 22 +++ .../max-concurrency/collect-results.sh | 180 ++++++++++++++++-- tests/load-tests/ci-scripts/setup-cluster.sh | 18 +- .../ci-scripts/setup-tekton-results-s3.sh | 34 ++++ .../ci-scripts/utility_scripts/runs-to-csv.sh | 18 +- tests/load-tests/cluster_read_config.yaml | 10 + tests/load-tests/run-max-concurrency.sh | 65 ++++--- tests/load-tests/run.sh | 52 ++--- 11 files changed, 334 insertions(+), 123 deletions(-) create mode 100755 tests/load-tests/ci-scripts/setup-tekton-results-s3.sh diff --git a/cmd/loadTests.go b/cmd/loadTests.go index 3845adae01..f6b69c56b7 100644 --- a/cmd/loadTests.go +++ b/cmd/loadTests.go @@ -219,7 +219,7 @@ func (u *UserAppsCompsMap) GetIntegrationTestScenarios(userName, appName string) var ( componentRepoUrl string = "https://github.com/devfile-samples/devfile-sample-code-with-quarkus" - componentDockerFilePath string = "src/main/docker/Dockerfile.jvm.staged" + componentDockerfilePath string = "src/main/docker/Dockerfile.jvm.staged" componentsCount int = 1 usernamePrefix string = "testuser" numberOfUsers int @@ -248,7 +248,6 @@ var ( UserCreationTimeMaxPerThread []time.Duration ApplicationCreationTimeMaxPerThread []time.Duration ItsCreationTimeMaxPerThread []time.Duration - CDQCreationTimeMaxPerThread []time.Duration ComponentCreationTimeMaxPerThread []time.Duration PipelineRunSucceededTimeMaxPerThread []time.Duration @@ -258,7 +257,6 @@ var ( UserCreationTimeSumPerThread []time.Duration ApplicationCreationTimeSumPerThread []time.Duration ItsCreationTimeSumPerThread []time.Duration - CDQCreationTimeSumPerThread []time.Duration ComponentCreationTimeSumPerThread []time.Duration PipelineRunSucceededTimeSumPerThread []time.Duration PipelineRunFailedTimeSumPerThread []time.Duration @@ -272,7 +270,6 @@ var ( SuccessfulUserCreationsPerThread []int64 SuccessfulApplicationCreationsPerThread []int64 SuccessfulItsCreationsPerThread []int64 - SuccessfulCDQCreationsPerThread []int64 SuccessfulComponentCreationsPerThread []int64 SuccessfulPipelineRunsPerThread []int64 SuccessfulDeploymentsPerThread []int64 @@ -282,7 +279,6 @@ var ( FailedUserCreationsPerThread []int64 FailedApplicationCreationsPerThread []int64 FailedItsCreationsPerThread []int64 - FailedCDQCreationsPerThread []int64 FailedComponentCreationsPerThread []int64 FailedPipelineRunsPerThread []int64 FailedDeploymentsPerThread []int64 @@ -322,6 +318,7 @@ type LogData struct { MachineName string `json:"machineName"` BinaryDetails string `json:"binaryDetails"` ComponentRepoUrl string `json:"componentRepoUrl"` + ComponentDockerfilePath string `json:"componentDockerfilePath"` ComponentsCount int `json:"componentsCount"` NumberOfThreads int `json:"threads"` NumberOfUsersPerThread int `json:"usersPerThread"` @@ -335,8 +332,6 @@ type LogData struct { MaxTimeToCreateApplications float64 `json:"createApplicationsTimeMax"` AverageTimeToCreateIts float64 `json:"createItsTimeAvg"` MaxTimeToCreateIts float64 `json:"createItsTimeMax"` - AverageTimeToCreateCDQs float64 `json:"createCDQsTimeAvg"` - MaxTimeToCreateCDQs float64 `json:"createCDQsTimeMax"` AverageTimeToCreateComponents float64 `json:"createComponentsTimeAvg"` MaxTimeToCreateComponents float64 `json:"createComponentsTimeMax"` AverageTimeToRunPipelineSucceeded float64 `json:"runPipelineSucceededTimeAvg"` @@ -361,9 +356,6 @@ type LogData struct { ItsCreationSuccessCount int64 `json:"createItsSuccesses"` ItsCreationFailureCount int64 `json:"createItsFailures"` ItsCreationFailureRate float64 `json:"createItsFailureRate"` - CDQCreationSuccessCount int64 `json:"createCDQsSuccesses"` - CDQCreationFailureCount int64 `json:"createCDQsFailures"` - CDQCreationFailureRate float64 `json:"createCDQsFailureRate"` ComponentCreationSuccessCount int64 `json:"createComponentsSuccesses"` ComponentCreationFailureCount int64 `json:"createComponentsFailures"` ComponentCreationFailureRate float64 `json:"createComponentsFailureRate"` @@ -395,7 +387,6 @@ type JourneyContext struct { AppStudioUsersBar *uiprogress.Bar ApplicationsBar *uiprogress.Bar ItsBar *uiprogress.Bar - CDQsBar *uiprogress.Bar ComponentsBar *uiprogress.Bar PipelinesBar *uiprogress.Bar IntegrationTestsPipelinesBar *uiprogress.Bar @@ -436,7 +427,6 @@ var rootCmd = &cobra.Command{ var AppStudioUsersBar *uiprogress.Bar var ApplicationsBar *uiprogress.Bar var itsBar *uiprogress.Bar -var CDQsBar *uiprogress.Bar var ComponentsBar *uiprogress.Bar var PipelinesBar *uiprogress.Bar var IntegrationTestsPipelinesBar *uiprogress.Bar @@ -451,6 +441,7 @@ func ExecuteLoadTest() { func init() { rootCmd.Flags().StringVar(&componentRepoUrl, "component-repo", componentRepoUrl, "the component repo URL to be used") + rootCmd.Flags().StringVar(&componentDockerfilePath, "component-dockerfile-path", componentDockerfilePath, "the path to Dockerfile within component repo") rootCmd.Flags().IntVar(&componentsCount, "components-count", componentsCount, "number of components to create per application") rootCmd.Flags().StringVar(&usernamePrefix, "username", usernamePrefix, "the prefix used for usersignup names") rootCmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "if 'debug' traces should be displayed in the console") @@ -590,6 +581,7 @@ func setup(cmd *cobra.Command, args []string) { MachineName: machineName, BinaryDetails: binaryDetails, ComponentRepoUrl: componentRepoUrl, + ComponentDockerfilePath: componentDockerfilePath, ComponentsCount: componentsCount, NumberOfThreads: threadCount, NumberOfUsersPerThread: numberOfUsers, @@ -623,11 +615,6 @@ func setup(cmd *cobra.Command, args []string) { }) itsBar = itsProgress - cdqProgress := uip.AddBar(overallCount).AppendCompleted().PrependFunc(func(b *uiprogress.Bar) string { - return strutil.PadLeft(fmt.Sprintf("Creating AppStudio CDQs (%d/%d) [%d failed]", b.Current(), overallCount, sumFromArray(FailedCDQCreationsPerThread)), barLength, ' ') - }) - CDQsBar = cdqProgress - componentProgress := uip.AddBar(overallCount).AppendCompleted().PrependFunc(func(b *uiprogress.Bar) string { return strutil.PadLeft(fmt.Sprintf("Creating AppStudio Components (%d/%d) [%d failed]", b.Current(), overallCount, sumFromArray(FailedComponentCreationsPerThread)), barLength, ' ') }) @@ -660,7 +647,6 @@ func setup(cmd *cobra.Command, args []string) { UserCreationTimeMaxPerThread = make([]time.Duration, threadCount) ApplicationCreationTimeMaxPerThread = make([]time.Duration, threadCount) ItsCreationTimeMaxPerThread = make([]time.Duration, threadCount) - CDQCreationTimeMaxPerThread = make([]time.Duration, threadCount) ComponentCreationTimeMaxPerThread = make([]time.Duration, threadCount) PipelineRunSucceededTimeMaxPerThread = make([]time.Duration, threadCount) @@ -670,7 +656,6 @@ func setup(cmd *cobra.Command, args []string) { UserCreationTimeSumPerThread = make([]time.Duration, threadCount) ApplicationCreationTimeSumPerThread = make([]time.Duration, threadCount) ItsCreationTimeSumPerThread = make([]time.Duration, threadCount) - CDQCreationTimeSumPerThread = make([]time.Duration, threadCount) ComponentCreationTimeSumPerThread = make([]time.Duration, threadCount) PipelineRunSucceededTimeSumPerThread = make([]time.Duration, threadCount) PipelineRunFailedTimeSumPerThread = make([]time.Duration, threadCount) @@ -685,7 +670,6 @@ func setup(cmd *cobra.Command, args []string) { SuccessfulUserCreationsPerThread = make([]int64, threadCount) SuccessfulApplicationCreationsPerThread = make([]int64, threadCount) SuccessfulItsCreationsPerThread = make([]int64, threadCount) - SuccessfulCDQCreationsPerThread = make([]int64, threadCount) SuccessfulComponentCreationsPerThread = make([]int64, threadCount) SuccessfulPipelineRunsPerThread = make([]int64, threadCount) SuccessfulPVCCreationsPerThread = make([]int64, threadCount) @@ -696,7 +680,6 @@ func setup(cmd *cobra.Command, args []string) { FailedUserCreationsPerThread = make([]int64, threadCount) FailedApplicationCreationsPerThread = make([]int64, threadCount) FailedItsCreationsPerThread = make([]int64, threadCount) - FailedCDQCreationsPerThread = make([]int64, threadCount) FailedComponentCreationsPerThread = make([]int64, threadCount) FailedPipelineRunsPerThread = make([]int64, threadCount) @@ -718,7 +701,6 @@ func setup(cmd *cobra.Command, args []string) { AppStudioUsersBar: AppStudioUsersBar, ApplicationsBar: ApplicationsBar, ItsBar: itsBar, - CDQsBar: CDQsBar, ComponentsBar: ComponentsBar, PipelinesBar: PipelinesBar, IntegrationTestsPipelinesBar: IntegrationTestsPipelinesBar, @@ -804,24 +786,6 @@ func setup(cmd *cobra.Command, args []string) { itsCreationFailureRate := float64(itsCreationFailureCount) / float64(overallCount) logData.ItsCreationFailureRate = itsCreationFailureRate - // Compiling data about CDQs - cdqCreationSuccessCount := sumFromArray(SuccessfulCDQCreationsPerThread) - logData.CDQCreationSuccessCount = cdqCreationSuccessCount - - cdqCreationFailureCount := sumFromArray(FailedCDQCreationsPerThread) - logData.CDQCreationFailureCount = cdqCreationFailureCount - - averageTimeToCreateCDQs := float64(0) - if cdqCreationSuccessCount > 0 { - averageTimeToCreateCDQs = sumDurationFromArray(CDQCreationTimeSumPerThread).Seconds() / float64(cdqCreationSuccessCount) - } - logData.AverageTimeToCreateCDQs = averageTimeToCreateCDQs - - logData.MaxTimeToCreateCDQs = maxDurationFromArray(CDQCreationTimeMaxPerThread).Seconds() - - cdqCreationFailureRate := float64(cdqCreationFailureCount) / float64(overallCount) - logData.CDQCreationFailureRate = cdqCreationFailureRate - // Compiling data about Components componentCreationSuccessCount := sumFromArray(SuccessfulComponentCreationsPerThread) logData.ComponentCreationSuccessCount = componentCreationSuccessCount @@ -831,7 +795,7 @@ func setup(cmd *cobra.Command, args []string) { averageTimeToCreateComponents := float64(0) if componentCreationSuccessCount > 0 { - averageTimeToCreateComponents = sumDurationFromArray(ComponentCreationTimeSumPerThread).Seconds() / float64(cdqCreationSuccessCount) + averageTimeToCreateComponents = sumDurationFromArray(ComponentCreationTimeSumPerThread).Seconds() / float64(componentCreationSuccessCount) } logData.AverageTimeToCreateComponents = averageTimeToCreateComponents @@ -912,7 +876,7 @@ func setup(cmd *cobra.Command, args []string) { deploymentFailureRate := float64(deploymentFailureCount) / float64(overallCount*componentsCount) logData.DeploymentFailureRate = deploymentFailureRate - workloadKPI := logData.AverageTimeToCreateApplications + logData.AverageTimeToCreateCDQs + logData.AverageTimeToCreateComponents + logData.AverageTimeToRunPipelineSucceeded + logData.AverageTimeToDeploymentSucceeded + workloadKPI := logData.AverageTimeToCreateApplications + logData.AverageTimeToCreateComponents + logData.AverageTimeToRunPipelineSucceeded + logData.AverageTimeToDeploymentSucceeded logData.WorkloadKPI = workloadKPI if stage { StageCleanup(journeyContexts) @@ -926,7 +890,6 @@ func setup(cmd *cobra.Command, args []string) { klog.Infof("Avg/max time to spin up users: %.2f s/%.2f s", averageTimeToSpinUpUsers, logData.MaxTimeToSpinUpUsers) klog.Infof("Avg/max time to create application: %.2f s/%.2f s", averageTimeToCreateApplications, logData.MaxTimeToCreateApplications) klog.Infof("Avg/max time to create integration test: %.2f s/%.2f s", averageTimeToCreateIts, logData.MaxTimeToCreateIts) - klog.Infof("Avg/max time to create cdq: %.2f s/%.2f s", averageTimeToCreateCDQs, logData.MaxTimeToCreateCDQs) klog.Infof("Avg/max time to create component: %.2f s/%.2f s", averageTimeToCreateComponents, logData.MaxTimeToCreateComponents) klog.Infof("Avg/max time to complete pipelinesrun: %.2f s/%.2f s", averageTimeToRunPipelineSucceeded, logData.MaxTimeToRunPipelineSucceeded) klog.Infof("Avg/max time to complete integration test: %.2f s/%.2f s", IntegrationTestsAverageTimeToRunPipelineSucceeded, logData.IntegrationTestsMaxTimeToRunPipelineSucceeded) @@ -943,7 +906,6 @@ func setup(cmd *cobra.Command, args []string) { klog.Infof("Number of times application creation worked/failed: %d/%d (%.2f %%)", applicationCreationSuccessCount, applicationCreationFailureCount, applicationCreationFailureRate*100) klog.Infof("Number of times integration tests creation worked/failed: %d/%d (%.2f %%)", itsCreationSuccessCount, itsCreationFailureCount, itsCreationFailureRate*100) - klog.Infof("Number of times cdq creation worked/failed: %d/%d (%.2f %%)", cdqCreationSuccessCount, cdqCreationFailureCount, cdqCreationFailureRate*100) klog.Infof("Number of times component creation worked/failed: %d/%d (%.2f %%)", componentCreationSuccessCount, componentCreationFailureCount, componentCreationFailureRate*100) klog.Infof("Number of times pipeline run worked/failed: %d/%d (%.2f %%)", pipelineRunSuccessCount, pipelineRunFailureCount, pipelineRunFailureRate*100) klog.Infof("Number of times integration tests' pipeline run worked/failed: %d/%d (%.2f %%)", integrationTestsPipelineRunSuccessCount, integrationTestsPipelineRunFailureCount, IntegrationTestsPipelineRunFailureRate*100) @@ -1478,7 +1440,7 @@ func (h *ConcreteHandlerResources) handleComponentCreation(ctx *JourneyContext, GitSource: &appstudioApi.GitSource{ URL: componentRepoUrl, Revision: "", - DockerfileURL: componentDockerFilePath, + DockerfileURL: componentDockerfilePath, }, }, }, diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh index f335389521..8b40ac0891 100755 --- a/tests/load-tests/ci-scripts/collect-results.sh +++ b/tests/load-tests/ci-scripts/collect-results.sh @@ -15,7 +15,7 @@ source "./tests/load-tests/ci-scripts/user-prefix.sh" echo "Collecting load test results" load_test_log=$ARTIFACT_DIR/load-tests.log -find "$output_dir" -type f -name '*.log' -exec cp -vf {} "${ARTIFACT_DIR}" \; +find "$output_dir" -type f -name '*.logs?' -exec cp -vf {} "${ARTIFACT_DIR}" \; find "$output_dir" -type f -name 'load-tests.json' -exec cp -vf {} "${ARTIFACT_DIR}" \; find "$output_dir" -type f -name 'gh-rate-limits-remaining.csv' -exec cp -vf {} "${ARTIFACT_DIR}" \; find "$output_dir" -type f -name '*.pprof' -exec cp -vf {} "${ARTIFACT_DIR}" \; diff --git a/tests/load-tests/ci-scripts/load-test.sh b/tests/load-tests/ci-scripts/load-test.sh index c37e856a6b..3447f4a24f 100755 --- a/tests/load-tests/ci-scripts/load-test.sh +++ b/tests/load-tests/ci-scripts/load-test.sh @@ -11,7 +11,7 @@ pushd "${2:-./tests/load-tests}" source "./ci-scripts/user-prefix.sh" -export QUAY_E2E_ORGANIZATION MY_GITHUB_ORG GITHUB_TOKEN TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD KUBE_SCHEDULER_LOG_LEVEL +export QUAY_E2E_ORGANIZATION MY_GITHUB_ORG GITHUB_USER GITHUB_TOKEN TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD KUBE_SCHEDULER_LOG_LEVEL QUAY_E2E_ORGANIZATION=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/quay-org) MY_GITHUB_ORG=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/github-org) @@ -26,6 +26,8 @@ for kv in "${kvs[@]}"; do done echo >>"$rate_limits_csv" +echo -e "[INFO] Start tests with user: ${GITHUB_USER}" + while true; do timestamp=$(printf "%s" "$(date -u +'%FT%T')") echo -n "$timestamp" >>"$rate_limits_csv" diff --git a/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml b/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml index 2afa8af153..cf9bfb411a 100644 --- a/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml +++ b/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml @@ -45,4 +45,26 @@ {{ monitor_pod('tekton-results', 'tekton-results-watcher', 20, '-.*') }} {{ monitor_pod_container('tekton-results', 'tekton-results-watcher', 'watcher', 20, '-.*') }} +{{ monitor_pod('tekton-results', 'tekton-results-api', 20, '-.*') }} +{{ monitor_pod_container('tekton-results', 'tekton-results-api', 'api', 20, '-.*') }} {{ pv_stats('tekton-results', 'data-postgres-postgresql-0', 20) }} + +- name: measurements.tekton-results-watcher.watcher_workqueue_depth + monitoring_query: sum(watcher_workqueue_depth{job="tekton-results-watcher"}) + monitoring_step: 20 + +- name: measurements.tekton-results-watcher.watcher_reconcile_latency_bucket + monitoring_query: histogram_quantile(0.99, sum(rate(watcher_reconcile_latency_bucket{job="tekton-results-watcher"}[30m])) by (le) ) / 1000 + monitoring_step: 20 + +- name: measurements.cluster_cpu_usage_seconds_total_rate + monitoring_query: sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=""}) + monitoring_step: 20 + +- name: measurements.cluster_memory_usage_rss_total + monitoring_query: sum(container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", cluster="", container!=""}) + monitoring_step: 20 + +- name: measurements.cluster_disk_throughput_total + monitoring_query: sum (rate(container_fs_reads_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]) + rate(container_fs_writes_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m])) + monitoring_step: 20 diff --git a/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh b/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh index ef3528570a..2bf86ee4e9 100755 --- a/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh +++ b/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh @@ -15,13 +15,17 @@ csv_delim=";" csv_delim_quoted="\"$csv_delim\"" dt_format='"%Y-%m-%dT%H:%M:%SZ"' +artifact_logs="${ARTIFACT_DIR}/logs" +artifact_pprof="${ARTIFACT_DIR}/pprof" + collect_artifacts() { echo "Collecting load test artifacts.." - mkdir -p "${ARTIFACT_DIR}/logs" - find "$output_dir" -type f -name 'load-tests.max-concurrency.*.log' -exec cp -vf {} "${ARTIFACT_DIR}/logs" \; - find "$output_dir" -type f -name 'load-tests.max-concurrency.json' -exec cp -vf {} "${ARTIFACT_DIR}" \; - mkdir -p "${ARTIFACT_DIR}/pprof" - find "$output_dir" -type f -name '*.pprof' -exec cp -vf {} "${ARTIFACT_DIR}/pprof" \; + mkdir -p "$artifact_logs" + find "$output_dir" -type f -name 'load-tests.max-concurrency.*.log' -exec cp -vf {} "$artifact_logs" \; + find "$output_dir" -type f -name 'load-tests.max-concurrency.json' -exec cp -vf {} "$artifact_logs" \; + find "$output_dir" -type f -name 'tekton-results-*.logs' -exec cp -vf {} "$artifact_logs" \; + mkdir -p "$artifact_pprof" + find "$output_dir" -type f -name '*.pprof' -exec cp -vf {} "$artifact_pprof" \; } collect_monitoring_data() { @@ -56,10 +60,25 @@ collect_monitoring_data() { -d &>"$monitoring_collection_log" cp -f "$monitoring_collection_data" "$ARTIFACT_DIR" + mkdir -p "$artifact_logs/ggm" + for file in $(find "$monitoring_collection_dir/" -maxdepth 1 -name "*.csv"); do + echo "Converting $file" + out="$artifact_logs/ggm/$(basename "$file")" + rm -rf "$out" + while read line; do + timestamp=$(echo "$line" | cut -d "," -f1) + value=$(echo "$line" | cut -d "," -f2) + echo "$(date -d "@$timestamp" "+%Y-%m-%dT%H:%M:%S.%N" --utc);$value" >>"$out" + done <<<"$(tail -n +2 "$file")" & + done + wait + ## Monitoring data per iteration for monitoring_collection_data in $(find "$output_dir" -type f -name 'load-tests.max-concurrency.*.json'); do iteration_index=$(echo "$monitoring_collection_data" | sed -e 's,.*/load-tests.max-concurrency.\([0-9]\+-[0-9]\+\).json,\1,') monitoring_collection_log="$ARTIFACT_DIR/monitoring-collection.$iteration_index.log" + monitoring_collection_dir="$ARTIFACT_DIR/monitoring-collection-raw-data-dir/$iteration_index" + mkdir -p "$monitoring_collection_dir" echo "Collecting monitoring data for step $iteration_index..." mstart=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get timestamp)" --iso-8601=seconds) mend=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get endTimestamp)" --iso-8601=seconds) @@ -69,6 +88,7 @@ collect_monitoring_data() { --additional ./tests/load-tests/cluster_read_config.yaml \ --monitoring-start "$mstart" \ --monitoring-end "$mend" \ + --monitoring-raw-data-dir "$monitoring_collection_dir" \ --prometheus-host "https://$mhost" \ --prometheus-port 443 \ --prometheus-token "$(oc whoami -t)" \ @@ -87,8 +107,8 @@ collect_tekton_profiling_data() { for pprof_profile in $(find "$output_dir" -name "*.pprof"); do if [ -s "$pprof_profile" ]; then file=$(basename "$pprof_profile") - go tool pprof -text "$pprof_profile" >"$ARTIFACT_DIR/pprof/$file.txt" || true - go tool pprof -svg -output="$ARTIFACT_DIR/pprof/$file.svg" "$pprof_profile" || true + go tool pprof -text "$pprof_profile" >"$artifact_pprof/$file.txt" || true + go tool pprof -svg -output="$artifact_pprof/$file.svg" "$pprof_profile" || true fi done fi @@ -112,12 +132,11 @@ collect_scalability_data() { ${csv_delim}Threads\ ${csv_delim}WorkloadKPI\ ${csv_delim}Errors\ +${csv_delim}Duration\ ${csv_delim}UserAvgTime\ ${csv_delim}UserMaxTime\ ${csv_delim}ApplicationAvgTime\ ${csv_delim}ApplicationMaxTime\ -${csv_delim}CDQAvgTime\ -${csv_delim}CDQMaxTime\ ${csv_delim}ComponentsAvgTime\ ${csv_delim}ComponentsMaxTime\ ${csv_delim}PipelineRunAvgTime\ @@ -136,6 +155,15 @@ ${csv_delim}ClusterPVCInUseAvg\ ${csv_delim}TektonResultsWatcherMemoryMin\ ${csv_delim}TektonResultsWatcherMemoryMax\ ${csv_delim}TektonResultsWatcherMemoryRange\ +${csv_delim}TektonResultsWatcherCPUMin\ +${csv_delim}TektonResultsWatcherCPUMax\ +${csv_delim}TektonResultsWatcherCPURange\ +${csv_delim}TektonResultsWatcherWorkqueueDepthMin\ +${csv_delim}TektonResultsWatcherWorkqueueDepthMax\ +${csv_delim}TektonResultsWatcherWorkqueueDepthRange\ +${csv_delim}TektonResultsWatcherReconcileLatencyBucketMin\ +${csv_delim}TektonResultsWatcherReconcileLatencyBucketMax\ +${csv_delim}TektonResultsWatcherReconcileLatencyBucketRange\ ${tekton_results_watcher_pod_headers}\ ${csv_delim}SchedulerPendingPodsCountAvg\ ${csv_delim}TokenPoolRatePrimaryAvg\ @@ -171,12 +199,11 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \ + $csv_delim_quoted + (.threads | tostring) \ + $csv_delim_quoted + (.workloadKPI | tostring) \ + $csv_delim_quoted + (.errorsTotal | tostring) \ + + $csv_delim_quoted + ((.endTimestamp | strptime(\"%Y-%m-%dT%H:%M:%S%z\") | mktime) - (.timestamp | strptime(\"%Y-%m-%dT%H:%M:%S%z\") | mktime) | tostring) \ + $csv_delim_quoted + (.createUserTimeAvg | tostring) \ + $csv_delim_quoted + (.createUserTimeMax | tostring) \ + $csv_delim_quoted + (.createApplicationsTimeAvg | tostring) \ + $csv_delim_quoted + (.createApplicationsTimeMax | tostring) \ - + $csv_delim_quoted + (.createCDQsTimeAvg | tostring) \ - + $csv_delim_quoted + (.createCDQsTimeMax | tostring) \ + $csv_delim_quoted + (.createComponentsTimeAvg | tostring) \ + $csv_delim_quoted + (.createComponentsTimeMax | tostring) \ + $csv_delim_quoted + (.runPipelineSucceededTimeAvg | tostring) \ @@ -195,6 +222,15 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \ + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.min | tostring) \ + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.max | tostring) \ + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.range | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.min | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.max | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.range | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.min | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.max | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.range | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.min | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.max | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.range | tostring) \ ${parked_go_routines_columns} \ + $csv_delim_quoted + (.measurements.scheduler_pending_pods_count.mean | tostring) \ + $csv_delim_quoted + (.measurements.token_pool_rate_primary.mean | tostring) \ @@ -249,8 +285,130 @@ collect_timestamp_csvs() { oc get pipelineruns.tekton.dev -A -o json | jq "$jq_cmd" | sed -e "s/\n//g" -e "s/^\"//g" -e "s/\"$//g" -e "s/Z;/;/g" | sort -t ";" -k 13 -r -n >>"$pipelinerun_timestamps" } +jq_iso_8601_to_seconds="( \ + (if \$d | contains(\"m\") and (endswith(\"ms\") | not) then (\$d | capture(\"(?\\\\d+)m(?\\\\d+\\\\.?(\\\\d+)?)s\") | (.minutes | tonumber * 60) + (.seconds | tonumber)) else 0 end) + \ + (if \$d | (contains(\"m\") | not) and contains(\"s\") and (endswith(\"ms\") | not) and (endswith(\"µs\") | not) then (\$d | capture(\"(?\\\\d+\\\\.\\\\d+)s\") | (.seconds | tonumber)) else 0 end) + \ + (if \$d | endswith(\"ms\") then (\$d | split(\"ms\") | .[0] | tonumber / 1000) else 0 end) + \ + (if \$d | endswith(\"µs\") then (\$d | split(\"µs\") | .[0] | tonumber / 1000000) else 0 end) \ +) | tostring" + +convert_go_duration_to_seconds() { + local duration=$1 + local total_seconds=0 + + # Extract hours, minutes, seconds, milliseconds, and microseconds + if [[ $duration =~ ([0-9]*\.?[0-9]+)h ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} * 3600") + fi + if [[ $duration =~ ([0-9]*\.?[0-9]+)m ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} * 60") + fi + if [[ $duration =~ ([0-9]*\.?[0-9]+)s ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]}") + fi + if [[ $duration =~ ([0-9]*\.?[0-9]+)ms ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} / 1000") + fi + if [[ $duration =~ ([0-9]*\.?[0-9]+)(µs|us) ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} / 1000000") + fi + + echo $total_seconds +} + +collect_tekton_results_logs() { + echo "Collecting Tekton results logs..." + mkdir -p "$artifact_logs" + ggm=$artifact_logs/ggm + mkdir -p "$ggm" + oc logs -c api -n tekton-results -l "app.kubernetes.io/name=tekton-results-api" --prefix --tail=-1 >"$artifact_logs/tekton-results-api.log" + oc logs -c watcher -n tekton-results -l "app.kubernetes.io/name=tekton-results-watcher" --prefix --tail=-1 >"$artifact_logs/tekton-results-watcher.log" + oc logs -c minio -n tekton-results "pod/storage-pool-0-0" --prefix --tail=-1 >"$artifact_logs/tekton-result-storage.log" + ts_format='"%Y-%m-%dT%H:%M:%S"' + + jq_cmd="(.ts | strftime($ts_format)) + (.ts | tostring | capture(\".*(?\\\\.\\\\d+)\") | .milliseconds) \ + + $csv_delim_quoted + ( \ + .msg | capture(\"(?GGM(\\\\d+)?) (?.+) kind (?\\\\S*) ns (?\\\\S*) name (?\\\\S*).* times? spent (?.*)\") \ + | .id \ + + $csv_delim_quoted + (.type) \ + + $csv_delim_quoted + (.kind) \ + + $csv_delim_quoted + (.ns) \ + + $csv_delim_quoted + (.name) \ + + $csv_delim_quoted + (.duration) \ + + $csv_delim_quoted + (.duration as \$d | $jq_iso_8601_to_seconds ) \ + )" + component=tekton-results-api + metrics=("UpdateLog after handleReturn" "UpateLog after flush" "GRPC receive" "RBAC check" "get record" "create stream" "read stream") + for f in $(find $artifact_logs -type f -name "$component*.logs"); do + echo "Processing $f..." + grep "\"GGM" "$f" | sed -e 's,.*\({.*}\).*,\1,g' >$f.ggm.json + jq -rc "$jq_cmd" $f.ggm.json >"$f.csv" || true + for metric in "${metrics[@]}"; do + m="$(echo "$metric" | sed -e 's,[ /],_,g')" + grep "$metric"';' "$f.csv" >"$f.$m.csv" + done & + done + wait + for metric in "${metrics[@]}"; do + m="$(echo "$metric" | sed -e 's,[ /],_,g')" + find "$artifact_logs" -name "$component.*.logs.$m.csv" | xargs cat | sort -u >"$ggm/$component.$m.csv" + done + + component=tekton-results-watcher + metrics=("streamLogs" "dynamic Reconcile" "tkn read" "tkn write" "log copy and write" "flush" "close/rcv") + jq_cmd="if .ts | tostring | contains(\"-\") then .ts | capture(\"(?.*)Z\") | .t else (.ts | strftime($ts_format)) + (.ts | tostring | capture(\".*(?\\\\.\\\\d+)\") | .milliseconds) end \ + + ( \ + .msg | capture(\"(?GGM(\\\\d+)?) (?.+)(?\\\\S*) obj ns (?\\\\S*) obj name (?\\\\S*) times? spent (?.*)\") \ + | $csv_delim_quoted + (.id) \ + + $csv_delim_quoted + (.type) \ + + $csv_delim_quoted + (.kind) \ + + $csv_delim_quoted + (.ns) \ + + $csv_delim_quoted + (.name) \ + + $csv_delim_quoted + (.duration) \ + + $csv_delim_quoted + (.duration as \$d | $jq_iso_8601_to_seconds ) \ + )" + for f in $(find $artifact_logs -type f -name "$component*.logs"); do + echo "Processing $f..." + grep "\"GGM" "$f" | sed -e 's,.*\({.*}\).*,\1,g' >$f.ggm.json + jq -rc "$jq_cmd" $f.ggm.json >"$f.csv" || true + for metric in "${metrics[@]}"; do + m="$(echo "$metric" | sed -e 's,[ /],_,g')" + grep "$metric"';' "$f.csv" >"$f.$m.csv" + done & + done + wait + for metric in "${metrics[@]}"; do + m="$(echo "$metric" | sed -e 's,[ /],_,g')" + find "$artifact_logs" -name "$component.*.logs.$m.csv" | xargs cat | sort -u >"$ggm/$component.$m.csv" + done + + log_file=$(find "$output_dir" -name 'tekton-results-api.*.logs' | tail -n1) + + i=16 + output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv" + echo "Processing $output..." + rm -rvf "$output" + while read line; do + duration_ts=$(echo "$line" | sed -e "s,.* HandleStreams \([^ ]\+\).*ts \(.*\),\2;\1,g") + IFS=";" read -ra tokens <<<"${duration_ts}" + echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output" + done <<<"$(grep "GGMGGM$i" $log_file)" + + for i in 17 18; do + output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv" + echo "Processing $output..." + rm -rvf "$output" + while read line; do + duration_ts=$(echo "$line" | sed -e "s,.* time \([^ ]\+\) ts \([^ ]\+\).*,\2;\1,g") + IFS=";" read -ra tokens <<<"${duration_ts}" + echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output" + done <<<"$(grep "GGMGGM$i" $log_file)" + done +} + echo "Collecting max concurrency results..." collect_artifacts || true +collect_tekton_results_logs || true collect_timestamp_csvs || true collect_monitoring_data || true collect_scalability_data || true diff --git a/tests/load-tests/ci-scripts/setup-cluster.sh b/tests/load-tests/ci-scripts/setup-cluster.sh index 40e898a5d3..899a18cb14 100755 --- a/tests/load-tests/ci-scripts/setup-cluster.sh +++ b/tests/load-tests/ci-scripts/setup-cluster.sh @@ -13,7 +13,7 @@ pushd "${2:-.}" echo "Installing app-studio and tweaking cluster configuration" go mod tidy go mod vendor -export MY_GITHUB_ORG QUAY_E2E_ORGANIZATION INFRA_DEPLOYMENTS_ORG INFRA_DEPLOYMENTS_BRANCH TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD E2E_PAC_GITHUB_APP_ID E2E_PAC_GITHUB_APP_PRIVATE_KEY ENABLE_SCHEDULING_ON_MASTER_NODES +export MY_GITHUB_ORG GITHUB_USER QUAY_E2E_ORGANIZATION INFRA_DEPLOYMENTS_ORG INFRA_DEPLOYMENTS_BRANCH TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD E2E_PAC_GITHUB_APP_ID E2E_PAC_GITHUB_APP_PRIVATE_KEY ENABLE_SCHEDULING_ON_MASTER_NODES TEKTON_RESULTS_S3_BUCKET_NAME MY_GITHUB_ORG=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/github-org) QUAY_E2E_ORGANIZATION=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/quay-org) INFRA_DEPLOYMENTS_ORG=${INFRA_DEPLOYMENTS_ORG:-redhat-appstudio} @@ -21,6 +21,7 @@ INFRA_DEPLOYMENTS_BRANCH=${INFRA_DEPLOYMENTS_BRANCH:-main} E2E_PAC_GITHUB_APP_ID="$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/pac-github-app-id)" E2E_PAC_GITHUB_APP_PRIVATE_KEY="$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/pac-github-app-private-key)" ENABLE_SCHEDULING_ON_MASTER_NODES=false +TEKTON_RESULTS_S3_BUCKET_NAME=${TEKTON_RESULTS_S3_BUCKET_NAME:-} ## Tweak infra-deployments if [ "${TWEAK_INFRA_DEPLOYMENTS:-false}" == "true" ]; then @@ -46,6 +47,7 @@ fi ## Install infra-deployments echo "Installing infra-deployments" +echo " GitHub user: ${GITHUB_USER}" echo " GitHub org: ${INFRA_DEPLOYMENTS_ORG}" echo " GitHub branch: ${INFRA_DEPLOYMENTS_BRANCH}" make local/cluster/prepare @@ -65,4 +67,18 @@ oc patch -n application-service secret has-github-token -p '{"data": {"token": n oc rollout restart deployment -n application-service oc rollout status deployment -n application-service -w +## Setup tekton-results S3 +if [ -n "$TEKTON_RESULTS_S3_BUCKET_NAME" ]; then + echo "Setting up Tekton Results to use S3" + ./tests/load-tests/ci-scripts/setup-tekton-results-s3.sh + echo "Restarting Tekton Results API" + oc rollout restart deployment/tekton-results-api -n tekton-results + oc rollout status deployment/tekton-results-api -n tekton-results -w + echo "Restarting Tekton Results Watcher" + oc rollout restart deployment/tekton-results-watcher -n tekton-results + oc rollout status deployment/tekton-results-watcher -n tekton-results -w +else + echo "TEKTON_RESULTS_S3_BUCKET_NAME env variable is not set or empty - skipping setting up Tekton Results to use S3" +fi + popd diff --git a/tests/load-tests/ci-scripts/setup-tekton-results-s3.sh b/tests/load-tests/ci-scripts/setup-tekton-results-s3.sh new file mode 100755 index 0000000000..60a38a8269 --- /dev/null +++ b/tests/load-tests/ci-scripts/setup-tekton-results-s3.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +if [ -z "${TEKTON_RESULTS_S3_BUCKET_NAME}" ]; then + echo "TEKTON_RESULTS_S3_BUCKET_NAME env variable is not set or empty - skipping setting up Tekton Results to use S3" +else + echo "Setting up Tekton Results to use S3" +fi + +export AWS_REGION=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/aws_region) +export AWS_PROFILE=rhtap-perfscale +export AWS_DEFAULT_OUTPUT=json + +NS=tekton-results + +cli=oc +clin="$cli -n $NS" + +echo "Creating S3 bucket $TEKTON_RESULTS_S3_BUCKET_NAME" >&2 +if [ -z "$(aws s3api list-buckets | jq -rc '.Buckets[] | select(.Name =="'"$TEKTON_RESULTS_S3_BUCKET_NAME"'")')" ]; then + aws s3api create-bucket --bucket "$TEKTON_RESULTS_S3_BUCKET_NAME" --region="$AWS_REGION" --create-bucket-configuration LocationConstraint="$AWS_REGION" | jq -rc +else + echo "S3 bucket $TEKTON_RESULTS_S3_BUCKET_NAME already exists, skipping creation" +fi + +echo "Creating namepsace $NS" >&2 +$cli create namespace "$NS" --dry-run=client -o yaml | kubectl apply -f - + +echo "Creating S3 secret" >&2 +$clin create secret generic tekton-results-s3 \ + --from-literal=aws_access_key_id="$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/aws_access_key_id)" \ + --from-literal=aws_secret_access_key="$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/aws_secret_access_key)" \ + --from-literal=aws_region="$AWS_REGION" \ + --from-literal=bucket="$TEKTON_RESULTS_S3_BUCKET_NAME" \ + --from-literal=endpoint="https://s3.$AWS_REGION.amazonaws.com" --dry-run=client -o yaml | $clin apply -f - diff --git a/tests/load-tests/ci-scripts/utility_scripts/runs-to-csv.sh b/tests/load-tests/ci-scripts/utility_scripts/runs-to-csv.sh index d6ae6bee74..70fc8fcc5c 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/runs-to-csv.sh +++ b/tests/load-tests/ci-scripts/utility_scripts/runs-to-csv.sh @@ -36,12 +36,6 @@ createApplicationsFailureRate,\ createApplicationsTimeAvg,\ createApplicationsTimeMax,\ \ -createCDQsSuccesses,\ -createCDQsFailures,\ -createCDQsFailureRate,\ -createCDQsTimeAvg,\ -createCDQsTimeMax,\ -\ createComponentsSuccesses,\ createComponentsFailures,\ createComponentsFailureRate,\ @@ -99,8 +93,8 @@ echo "$headers" find "${1:-.}" -name load-tests.json -print0 | while IFS= read -r -d '' filename; do grep --quiet "XXXXX" "${filename}" && echo "WARNING placeholders found in ${filename}, removing" - sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+)/: "\1"/g' "${filename}" \ - | jq --raw-output '[ + sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+)/: "\1"/g' "${filename}" | + jq --raw-output '[ .metadata.env.BUILD_ID, .timestamp, .endTimestamp, @@ -132,12 +126,6 @@ find "${1:-.}" -name load-tests.json -print0 | while IFS= read -r -d '' filename .createApplicationsTimeAvg, .createApplicationsTimeMax, - .createCDQsSuccesses, - .createCDQsFailures, - .createCDQsFailureRate, - .createCDQsTimeAvg, - .createCDQsTimeMax, - .createComponentsSuccesses, .createComponentsFailures, .createComponentsFailureRate, @@ -162,7 +150,7 @@ find "${1:-.}" -name load-tests.json -print0 | while IFS= read -r -d '' filename .deploymentSucceededTimeAvg, .deploymentSucceededTimeMax, - .createApplicationsTimeAvg + .createCDQsTimeAvg + .createComponentsTimeAvg + .integrationTestsRunPipelineSucceededTimeAvg + .runPipelineSucceededTimeAvg + .deploymentSucceededTimeAvg, + .createApplicationsTimeAvg + .createComponentsTimeAvg + .integrationTestsRunPipelineSucceededTimeAvg + .runPipelineSucceededTimeAvg + .deploymentSucceededTimeAvg, .workloadKPI, .measurements.cluster_cpu_usage_seconds_total_rate.mean, diff --git a/tests/load-tests/cluster_read_config.yaml b/tests/load-tests/cluster_read_config.yaml index de8d03677b..b8f6c8481e 100644 --- a/tests/load-tests/cluster_read_config.yaml +++ b/tests/load-tests/cluster_read_config.yaml @@ -200,3 +200,13 @@ {{ monitor_pod('openshift-pipelines', 'tekton-pipelines-controller', 15) }} {{ monitor_pod('tekton-results', 'tekton-results-watcher', 1, '-.*') }} {{ monitor_pod_container('tekton-results', 'tekton-results-watcher', 'watcher', 1, '-.*') }} +{{ monitor_pod('tekton-results', 'tekton-results-api', 1, '-.*') }} +{{ monitor_pod_container('tekton-results', 'tekton-results-api', 'api', 1, '-.*') }} + +- name: measurements.tekton-results-watcher.watcher_workqueue_depth + monitoring_query: sum(watcher_workqueue_depth{job="tekton-results-watcher"}) + monitoring_step: 1 + +- name: measurements.tekton-results-watcher.watcher_reconcile_latency_bucket + monitoring_query: histogram_quantile(0.99, sum(rate(watcher_reconcile_latency_bucket{job="tekton-results-watcher"}[30m])) by (le) ) / 1000 + monitoring_step: 1 \ No newline at end of file diff --git a/tests/load-tests/run-max-concurrency.sh b/tests/load-tests/run-max-concurrency.sh index 4a58375d66..6846c603ba 100755 --- a/tests/load-tests/run-max-concurrency.sh +++ b/tests/load-tests/run-max-concurrency.sh @@ -16,31 +16,38 @@ load_test() { iteration=$(printf "%04d" "${2:-1}") index=$(printf "%04d" "$threads") iteration_index="${iteration}-${index}" + echo + echo "=== RHTAP load test ===" + echo "Threads: $threads" + echo "Iteration: $iteration" + echo "Index: $index" + echo "Iteration index: $iteration_index" + echo ## Enable CPU profiling in Tekton if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ]; then - echo "Starting CPU profiling with pprof" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do - pod="${p##*/}" - file="tekton-pipelines-controller.$pod.cpu-profile.$iteration_index" - oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & - echo $! >"$output_dir/$file.pid" - done + echo "Starting CPU profiling of Tekton results watcher with pprof" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" file=tekton-results-watcher.$pod.cpu-profile.$iteration_index oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & echo $! >"$output_dir/$file.pid" + file=tekton-results-watcher.$pod.cpu-profile.mutex.$iteration_index + oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/mutex?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + echo $! >"$output_dir/$file.pid" done - fi - ## Enable memory profiling in Tekton - if [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then - echo "Starting memory profiling of Tekton controller with pprof" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do + echo "Starting CPU profiling of Tekton results API with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do pod="${p##*/}" - file="tekton-pipelines-controller.$pod.memory-profile.$iteration_index" - oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + file=tekton-results-api.$pod.cpu-profile.$iteration_index + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:6060/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + echo $! >"$output_dir/$file.pid" + file=tekton-results-api.$pod.cpu-profile.mutex.$iteration_index + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:6060/debug/pprof/mutex?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & echo $! >"$output_dir/$file.pid" done + fi + ## Enable memory profiling in Tekton + if [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then echo "Starting memory profiling of Tekton results watcher with pprof" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" @@ -48,11 +55,19 @@ load_test() { oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & echo $! >"$output_dir/$file.pid" done + echo "Starting memory profiling of Tekton results API with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do + pod="${p##*/}" + file=tekton-results-api.$pod.memory-profile.$iteration_index + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + echo $! >"$output_dir/$file.pid" + done fi rm -rvf "$output_dir/load-test.json" rm -rvf "$output_dir/load-test.log" go run loadtest.go \ --component-repo "${COMPONENT_REPO:-https://github.com/nodeshift-starters/devfile-sample.git}" \ + --component-dockerfile-path "${COMPONENT_DOCKERFILE_PATH:-src/main/docker/Dockerfile.jvm.staged}" \ --username "$USER_PREFIX-$index" \ --users 1 \ -w="${WAIT_PIPELINES:-true}" \ @@ -67,18 +82,10 @@ load_test() { --pipeline-skip-initial-checks="${PIPELINE_SKIP_INITIAL_CHECKS:-true}" if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ] || [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then echo "Waiting for the Tekton profiling to finish up to ${TEKTON_PERF_PROFILE_CPU_PERIOD}s" - for pid_file in $(find "$output_dir" -name 'tekton*.pid'); do + for pid_file in $(find $output_dir -name 'tekton*.pid'); do wait "$(cat "$pid_file")" rm -rvf "$pid_file" done - echo "Getting Tekton controller goroutine dump" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do - pod="${p##*/}" - for i in 0 1 2; do - file="tekton-pipelines-controller.$pod.goroutine-dump-$i.$iteration_index" - oc exec -n tekton-results "$p" -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" - done - done echo "Getting Tekton results watcher goroutine dump" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" @@ -87,6 +94,14 @@ load_test() { oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" done done + echo "Getting Tekton results api goroutine dump" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do + pod="${p##*/}" + for i in 0 1 2; do + file="tekton-results-api.$pod.goroutine-dump-$i.$iteration_index" + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL localhost:6060/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" + done + done fi } @@ -141,7 +156,7 @@ max_concurrency() { maxThreads=${MAX_THREADS:-10} threshold=${THRESHOLD:-300} echo '{"startTimestamp":"'"$(date +%FT%T%:z)"'", "maxThreads": '"$maxThreads"', "maxConcurrencySteps": "'"${maxConcurrencySteps[*]}"'", "threshold": '"$threshold"', "maxConcurrencyReached": 0, "computedConcurrency": 0, "workloadKPI": 0, "endTimestamp": "", "errorsTotal": -1}' | jq >"$output" - iteration=0 + iteration=${ITERATION_OFFSET:-0} for t in "${maxConcurrencySteps[@]}"; do iteration="$((iteration + 1))" if (("$t" > "$maxThreads")); then @@ -154,6 +169,8 @@ max_concurrency() { jq ".metadata.\"max-concurrency\".iteration = \"$(printf "%04d" "$iteration")\"" "$output_dir/load-tests.json" >"$output_dir/$$.json" && mv -f "$output_dir/$$.json" "$output_dir/load-tests.json" cp -vf "$output_dir/load-tests.json" "$output_dir/load-tests.max-concurrency.$iteration_index.json" cp -vf "$output_dir/load-tests.log" "$output_dir/load-tests.max-concurrency.$iteration_index.log" + oc logs -c watcher -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher --tail=-1 --prefix=true >"$output_dir/tekton-results-watcher.$iteration_index.logs" + oc logs -c api -n tekton-results -l app.kubernetes.io/name=tekton-results-api --tail=-1 --prefix=true >"$output_dir/tekton-results-api.$iteration_index.logs" workloadKPI=$(jq '.workloadKPI' "$output_dir/load-tests.json") if awk "BEGIN { exit !($workloadKPI > $threshold)}"; then echo "The average time a workload took to succeed (${workloadKPI}s) has exceeded a threshold of ${threshold}s with $t threads." diff --git a/tests/load-tests/run.sh b/tests/load-tests/run.sh index 90122433aa..b16a5947a8 100755 --- a/tests/load-tests/run.sh +++ b/tests/load-tests/run.sh @@ -30,29 +30,23 @@ elif [ "${RANDOM_PREFIX_FLAG}" == "" ] && [ ${#USER_PREFIX} -gt 15 ]; then else ## Enable CPU profiling in Tekton if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ]; then - echo "Starting CPU profiling with pprof" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do + echo "Starting CPU profiling of Tekton results watcher with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" - file="tekton-pipelines-controller.$pod.cpu-profile" - oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + file=tekton-results-watcher.$pod.cpu-profile + oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & echo $! >"$output_dir/$file.pid" done - p=$(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name) - pod="${p##*/}" - file=tekton-results-watcher.$pod.cpu-profile - oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & - echo $! >"$output_dir/$file.pid" - fi - ## Enable memory profiling in Tekton - if [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then - file=tekton-pipelines-controller.memory-profile - echo "Starting memory profiling of Tekton controller with pprof" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do + echo "Starting CPU profiling of Tekton results API with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do pod="${p##*/}" - file="tekton-pipelines-controller.$pod.memory-profile" - oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + file=tekton-results-api.$pod.cpu-profile + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:6060/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & echo $! >"$output_dir/$file.pid" done + fi + ## Enable memory profiling in Tekton + if [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then echo "Starting memory profiling of Tekton results watcher with pprof" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" @@ -60,6 +54,13 @@ else oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & echo $! >"$output_dir/$file.pid" done + echo "Starting memory profiling of Tekton results API with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do + pod="${p##*/}" + file=tekton-results-api.$pod.memory-profile + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + echo $! >"$output_dir/$file.pid" + done fi ## Switch KubeScheduler Debugging on if [ -n "$KUBE_SCHEDULER_LOG_LEVEL" ]; then @@ -82,6 +83,7 @@ else ## To enable progress bar , add `--enable-progress-bars` in [OPTIONS] go run loadtest.go \ --component-repo "${COMPONENT_REPO:-https://github.com/devfile-samples/devfile-sample-code-with-quarkus}" \ + --component-dockerfile-path "${COMPONENT_DOCKERFILE_PATH:-src/main/docker/Dockerfile.jvm.staged}" \ --username "$USER_PREFIX" \ --users "${USERS_PER_THREAD:-50}" \ --test-scenario-git-url "${TEST_SCENARIO_GIT_URL:-https://github.com/konflux-ci/integration-examples.git}" \ @@ -107,14 +109,6 @@ else wait "$(cat "$pid_file")" rm -rvf "$pid_file" done - echo "Getting Tekton controller goroutine dump" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do - pod="${p##*/}" - for i in 0 1 2; do - file="tekton-pipelines-controller.$pod.goroutine-dump-$i" - oc exec -n tekton-results "$p" -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" - done - done echo "Getting Tekton results watcher goroutine dump" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" @@ -123,6 +117,14 @@ else oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" done done + echo "Getting Tekton results api goroutine dump" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do + pod="${p##*/}" + for i in 0 1 2; do + file="tekton-results-api.$pod.goroutine-dump-$i" + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL localhost:6060/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" + done + done fi if [ -n "$KUBE_SCHEDULER_LOG_LEVEL" ]; then echo "Killing kube collector log collector"