Skip to content

Commit

Permalink
Merge pull request #85 from scality/feature/COSI-19-add-s3-iam-metrics
Browse files Browse the repository at this point in the history
COSI-19, COSI-21: Enhance Scality COSI Driver with S3 and IAM Metrics, Logging, and IAM Client Improvements
  • Loading branch information
anurag4DSB authored Jan 8, 2025
2 parents 3459e28 + d1ade33 commit ae9ed37
Show file tree
Hide file tree
Showing 13 changed files with 487 additions and 58 deletions.
47 changes: 47 additions & 0 deletions .github/scripts/e2e_tests_metrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,29 @@ log_and_run() {
"$@" 2>&1 | tee -a "$LOG_FILE"
}

validate_metric() {
local action="$1"
local count="$2"
local expected="$3"
local duration="$4"

# Print details for the metric
echo "$action Count: $count, Expected: $expected" | tee -a "$LOG_FILE"
echo "$action Duration: $duration" | tee -a "$LOG_FILE"

# Validate counts
if [[ "$count" -ne "$expected" ]]; then
echo "Error: $action count mismatch. Found: $count, Expected: $expected" | tee -a "$LOG_FILE"
exit 1
fi

# Validate durations are greater than 0
if (( $(echo "$duration <= 0" | bc -l) )); then
echo "Error: $action duration is not greater than 0. Duration: $duration" | tee -a "$LOG_FILE"
exit 1
fi
}

# Fetch services and validate the target service exists
log_and_run kubectl get svc --all-namespaces

Expand Down Expand Up @@ -97,4 +120,28 @@ echo "$METRICS_OUTPUT" | while read -r line; do
fi
done

log_and_run echo "Verifying S3 and IAM metrics..."
# only verify metrics if EXPECTED_CREATE_BUCKET is more than 0

if [[ "$EXPECTED_CREATE_BUCKET" -gt 0 ]]; then

S3_IAM_METRICS_OUTPUT=$(cat /tmp/metrics_output.log | grep 'scality_cosi_driver')
echo "Metrics fetched successfully:" | tee -a "$LOG_FILE"
echo "$S3_IAM_METRICS_OUTPUT" | tee -a "$LOG_FILE"
CREATE_BUCKET_COUNT="$(echo "$S3_IAM_METRICS_OUTPUT" | grep 'scality_cosi_driver_s3_requests_total' | grep 'action="CreateBucket"' | grep 'status="success"' | awk '{print $NF}')"
DELETE_BUCKET_COUNT="$(echo "$S3_IAM_METRICS_OUTPUT" | grep 'scality_cosi_driver_s3_requests_total' | grep 'action="DeleteBucket"' | grep 'status="success"' | awk '{print $NF}')"
CREATE_USER_COUNT="$(echo "$S3_IAM_METRICS_OUTPUT" | grep 'scality_cosi_driver_iam_requests_total' | grep 'action="CreateUser"' | grep 'status="success"' | awk '{print $NF}')"
DELETE_USER_COUNT="$(echo "$S3_IAM_METRICS_OUTPUT" | grep 'scality_cosi_driver_iam_requests_total' | grep 'action="DeleteUser"' | grep 'status="success"' | awk '{print $NF}')"

CREATE_BUCKET_DURATION="$(echo "$S3_IAM_METRICS_OUTPUT" | grep 'scality_cosi_driver_s3_request_duration_seconds_sum' | grep 'action="CreateBucket"' | awk '{print $NF}')"
DELETE_BUCKET_DURATION="$(echo "$S3_IAM_METRICS_OUTPUT" | grep 'scality_cosi_driver_s3_request_duration_seconds_sum' | grep 'action="DeleteBucket"' | awk '{print $NF}')"
CREATE_USER_DURATION="$(echo "$S3_IAM_METRICS_OUTPUT" | grep 'scality_cosi_driver_iam_request_duration_seconds_sum' | grep 'action="CreateUser"' | awk '{print $NF}')"
DELETE_USER_DURATION="$(echo "$S3_IAM_METRICS_OUTPUT" | grep 'scality_cosi_driver_iam_request_duration_seconds_sum' | grep 'action="DeleteUser"' | awk '{print $NF}')"

validate_metric "CreateBucket" "$CREATE_BUCKET_COUNT" "$EXPECTED_CREATE_BUCKET" "$CREATE_BUCKET_DURATION"
validate_metric "DeleteBucket" "$DELETE_BUCKET_COUNT" "$EXPECTED_DELETE_BUCKET" "$DELETE_BUCKET_DURATION"
validate_metric "CreateUser" "$CREATE_USER_COUNT" "$EXPECTED_GRANT_ACCESS" "$CREATE_USER_DURATION"
validate_metric "DeleteUser" "$DELETE_USER_COUNT" "$EXPECTED_REVOKE_ACCESS" "$DELETE_USER_DURATION"
fi

echo "Metrics validation successful!" | tee -a "$LOG_FILE"
1 change: 1 addition & 0 deletions cmd/scality-cosi-driver/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func init() {
func run(ctx context.Context) error {
registry := prometheus.NewRegistry()
driverName := *driverPrefix + "." + provisionerName
metrics.InitializeMetrics(defaultMetricsPrefix, registry)

metricsServer, err := metrics.StartMetricsServerWithRegistry(*driverMetricsAddress, registry, *driverMetricsPath)
if err != nil {
Expand Down
125 changes: 125 additions & 0 deletions docs/metrics-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,131 @@ The COSI driver exposes default gRPC server metrics to monitor RPC activity.
grpc_server_started_total{grpc_method="DriverGetInfo",grpc_service="cosi.v1alpha1.Identity",grpc_type="unary"} 2
```

---
## IAM Operation Metrics

The COSI driver collects metrics for IAM operations performed via the AWS IAM API. These metrics help track the number and duration of IAM-related operations, enabling better monitoring and observability of IAM activity.

Status Labels

| Label | Description |
|-----------|-------------------------------------------------|
| `success` | Indicates the operation completed successfully. |
| `error` | Indicates the operation failed. |

### Key IAM Metrics

| Metric Name | Description | Labels | Example Values |
|---------------------------------------------------|------------------------------------------------------------|-------------------|----------------------------------|
| `scality_cosi_driver_iam_request_duration_seconds`| Histogram of IAM request durations in seconds. | `action`, `status`| `CreateUser`, `success` |
| `scality_cosi_driver_iam_requests_total` | Total number of IAM requests categorized by action and status. | `action`, `status`| `CreateAccessKey`, `success` |

### IAM Operations

| IAM Operation | Description |
|-------------------------|---------------------------------------------------------------------|
| `CreateUser` | Creates an IAM user with the specified username. |
| `CreateAccessKey` | Generates access keys for a specific IAM user. |
| `PutUserPolicy` | Attaches an inline S3 wildcard policy to a user for bucket access. |
| `GetUser` | Retrieves details about an IAM user. |
| `ListAccessKeys` | Lists all access keys associated with an IAM user. |
| `DeleteAccessKey` | Deletes a specific access key associated with an IAM user. |
| `DeleteUserPolicy` | Deletes an inline policy associated with an IAM user. |
| `DeleteUser` | Deletes an IAM user. |

### Example IAM Metrics Output

Duration of IAM requests in seconds

```sh
scality_cosi_driver_iam_request_duration_seconds_bucket{action="CreateUser",status="success",le="0.01"} 3
scality_cosi_driver_iam_request_duration_seconds_bucket{action="CreateUser",status="success",le="0.025"} 4
scality_cosi_driver_iam_request_duration_seconds_sum{action="CreateUser",status="success"} 0.014
scality_cosi_driver_iam_request_duration_seconds_count{action="CreateUser",status="success"} 4
```

Total number of IAM requests

```sh
scality_cosi_driver_iam_requests_total{action="CreateUser",status="success"} 4
scality_cosi_driver_iam_requests_total{action="DeleteAccessKey",status="error"} 1
```

### Example IAM Workflow

#### Creating Bucket Access

1. Create an IAM user (`CreateUser`).
2. Attach an inline policy for bucket access (`PutUserPolicy`).
3. Generate access keys for the IAM user (`CreateAccessKey`).

#### Revoking Bucket Access

1. Verify the IAM user exists (`GetUser`).
2. Delete inline policies (`DeleteUserPolicy`).
3. Delete all associated access keys (`DeleteAccessKey`).
4. Delete the IAM user (`DeleteUser`).

---

## S3 Operation Metrics

The COSI driver collects metrics for S3 bucket operations performed via the AWS S3 API. These metrics help monitor bucket-related operations and their durations.

### Status Labels

```sh
| Label | Description |
|-----------|-------------------------------------------------|
| `success` | Indicates the operation completed successfully. |
| `error` | Indicates the operation failed. |
```

### Key S3 Metrics

| Metric Name | Description | Labels | Example Values |
|---------------------------------------------------|------------------------------------------------------------|-------------------|----------------------------------|
| `scality_cosi_driver_s3_request_duration_seconds` | Histogram of S3 request durations in seconds. | `action`, `status`| `CreateBucket`, `success` |
| `scality_cosi_driver_s3_requests_total` | Total number of S3 requests categorized by action and status. | `action`, `status`| `DeleteBucket`, `success` |

### S3 Operations

| S3 Operation | Description |
|---------------------|--------------------------------------------------------------------------|
| `CreateBucket` | Creates a new S3 bucket in the specified region. |
| `DeleteBucket` | Deletes an existing S3 bucket. (only empty bucket deletion is supported) |

### Example S3 Metrics Output

Duration of S3 requests in seconds

```sh
scality_cosi_driver_s3_request_duration_seconds_bucket{action="CreateBucket",status="success",le="0.01"} 1
scality_cosi_driver_s3_request_duration_seconds_bucket{action="CreateBucket",status="success",le="0.05"} 2
scality_cosi_driver_s3_request_duration_seconds_sum{action="CreateBucket",status="success"} 0.04
scality_cosi_driver_s3_request_duration_seconds_count{action="CreateBucket",status="success"} 2
```

Total number of S3 requests

```sh
scality_cosi_driver_s3_requests_total{action="CreateBucket",status="success"} 2
scality_cosi_driver_s3_requests_total{action="DeleteBucket",status="success"} 1
```

### Example S3 Workflow

#### Creating a Bucket

1. Specify the bucket name and region.
2. Use the `CreateBucket` operation to create the bucket.
3. Configure bucket properties (e.g., policies, versioning) if needed.

#### Deleting a Bucket

1. Verify the bucket exists.
2. Use the `DeleteBucket` operation to delete the bucket. Only empty bucket deletion is supported.

## Additional Resource

- [gRPC-Go Prometheus Metrics](https://github.com/grpc-ecosystem/go-grpc-middleware)
Expand Down
17 changes: 11 additions & 6 deletions pkg/clients/iam/iam_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ import (
"github.com/aws/aws-sdk-go-v2/service/iam"
"github.com/aws/aws-sdk-go-v2/service/iam/types"
"github.com/aws/smithy-go/logging"
"github.com/aws/smithy-go/middleware"
c "github.com/scality/cosi-driver/pkg/constants"
"github.com/scality/cosi-driver/pkg/metrics"
"github.com/scality/cosi-driver/pkg/util"
"k8s.io/klog/v2"
)
Expand All @@ -36,7 +38,7 @@ type IAMClient struct {

var LoadAWSConfig = config.LoadDefaultConfig

var InitIAMClient = func(params util.StorageClientParameters) (*IAMClient, error) {
var InitIAMClient = func(ctx context.Context, params util.StorageClientParameters) (*IAMClient, error) {
var logger logging.Logger
if params.Debug {
logger = logging.NewStandardLogger(os.Stdout)
Expand All @@ -53,13 +55,16 @@ var InitIAMClient = func(params util.StorageClientParameters) (*IAMClient, error
httpClient.Transport = util.ConfigureTLSTransport(params.TLSCert)
}

ctx := context.Background()

awsCfg, err := LoadAWSConfig(ctx,
config.WithRegion(params.Region),
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(params.AccessKeyID, params.SecretAccessKey, "")),
config.WithHTTPClient(httpClient),
config.WithLogger(logger),
config.WithAPIOptions([]func(*middleware.Stack) error{
func(stack *middleware.Stack) error {
return metrics.AttachPrometheusMiddleware(stack, metrics.IAMRequestDuration, metrics.IAMRequestsTotal)
},
}),
)
if err != nil {
return nil, err
Expand All @@ -84,8 +89,8 @@ func (client *IAMClient) CreateUser(ctx context.Context, userName string) error
return err
}

// AttachS3WildcardInlinePolicy attaches an inline policy to an IAM user for a specific bucket.
func (client *IAMClient) AttachS3WildcardInlinePolicy(ctx context.Context, userName, bucketName string) error {
// CreateS3WildcardInlinePolicy creates an inline policy to an IAM user for a specific bucket.
func (client *IAMClient) CreateS3WildcardInlinePolicy(ctx context.Context, userName, bucketName string) error {
policyDocument := fmt.Sprintf(`{
"Version": "2012-10-17",
"Statement": [
Expand Down Expand Up @@ -128,7 +133,7 @@ func (client *IAMClient) CreateBucketAccess(ctx context.Context, userName, bucke
}
klog.V(c.LvlInfo).InfoS("Successfully created IAM user", "userName", userName)

err = client.AttachS3WildcardInlinePolicy(ctx, userName, bucketName)
err = client.CreateS3WildcardInlinePolicy(ctx, userName, bucketName)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit ae9ed37

Please sign in to comment.