From 57a2384ac78b9417a12638a72976f3a981637ce7 Mon Sep 17 00:00:00 2001 From: jt-dd <112463504+jt-dd@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:48:21 +0200 Subject: [PATCH] add env variable for ingestor/grpc image (#264) * add env variable for ingestor/grpc image * change bucket_name to bucket_url * warning message for retrying connection * typo --- configs/etc/kubehound-reference.yaml | 2 +- configs/etc/kubehound.yaml | 10 ++++---- .../k8s/khaas/conf/ingestor/kubehound.yaml | 6 ++--- deployments/k8s/khaas/values.yaml | 2 +- docs/user-guide/khaas-101.md | 2 +- pkg/config/collector.go | 4 ++-- pkg/config/config.go | 23 +++++++++++++++---- pkg/config/config_test.go | 8 +++---- pkg/config/ingestor.go | 4 ++-- pkg/config/janusgraph.go | 3 +++ pkg/config/mongodb.go | 3 +++ pkg/ingestor/puller/blob/blob.go | 4 ++-- pkg/ingestor/puller/blob/blob_test.go | 4 ++-- pkg/kubehound/storage/retrier.go | 2 ++ test/system/kubehound_dump.yaml | 4 ++-- test/system/setup_test.go | 2 +- 16 files changed, 52 insertions(+), 31 deletions(-) diff --git a/configs/etc/kubehound-reference.yaml b/configs/etc/kubehound-reference.yaml index 92a5a1ec3..f1328644f 100644 --- a/configs/etc/kubehound-reference.yaml +++ b/configs/etc/kubehound-reference.yaml @@ -125,7 +125,7 @@ builder: # ingestor: # blob: # # (i.e.: s3://) -# bucket: "" +# bucket_url: "" # # (i.e.: us-east-1) # region: "" # temp_dir: "/tmp/kubehound" diff --git a/configs/etc/kubehound.yaml b/configs/etc/kubehound.yaml index b36c66c79..7271f7813 100644 --- a/configs/etc/kubehound.yaml +++ b/configs/etc/kubehound.yaml @@ -50,10 +50,10 @@ builder: # Batch size for edge inserts batch_size: 500 - + # Cluster impact batch size for edge inserts batch_size_cluster_impact: 10 - + # Enable for large clusters to prevent number of edges growing exponentially large_cluster_optimizations: true @@ -61,13 +61,13 @@ builder: ingestor: blob: # (i.e.: s3://) - bucket: "" + bucket_url: "" # (i.e.: us-east-1) - region: "" + region: "" temp_dir: "/tmp/kubehound" archive_name: "archive.tar.gz" max_archive_size: 2147483648 # 2GB # GRPC endpoint for the ingestor - api: + api: endpoint: "127.0.0.1:9000" insecure: true diff --git a/deployments/k8s/khaas/conf/ingestor/kubehound.yaml b/deployments/k8s/khaas/conf/ingestor/kubehound.yaml index 6074ad0ca..339366442 100644 --- a/deployments/k8s/khaas/conf/ingestor/kubehound.yaml +++ b/deployments/k8s/khaas/conf/ingestor/kubehound.yaml @@ -18,7 +18,7 @@ collector: # General storage configuration storage: - # Whether or not to wipe all data on startup + # Whether or not to wipe all data on startup wipe: false # Number of connection retries before declaring an error @@ -61,7 +61,7 @@ builder: # Batch size for edge inserts batch_size: 1000 - + # Cluster impact batch size for edge inserts batch_size_cluster_impact: 10 @@ -70,7 +70,7 @@ builder: ingestor: blob: - bucket: "{{ $.Values.services.ingestor.bucket }}" + bucket_url: "{{ $.Values.services.ingestor.bucket_url }}" region: "{{ $.Values.services.ingestor.region }}" temp_dir: "/tmp/kubehound" archive_name: "archive.tar.gz" diff --git a/deployments/k8s/khaas/values.yaml b/deployments/k8s/khaas/values.yaml index c0897b8bf..0e05af021 100644 --- a/deployments/k8s/khaas/values.yaml +++ b/deployments/k8s/khaas/values.yaml @@ -3,7 +3,7 @@ services: ingestor: image: ghcr.io/datadog/kubehound-binary version: latest - bucket: s3:// + bucket_url: s3:// region: "us-east-1" resources: requests: diff --git a/docs/user-guide/khaas-101.md b/docs/user-guide/khaas-101.md index 570788c72..ade1ecb21 100644 --- a/docs/user-guide/khaas-101.md +++ b/docs/user-guide/khaas-101.md @@ -93,7 +93,7 @@ If you don't want to specify the bucket every time, you can set it up in your lo ingestor: blob: # (i.e.: s3://) - bucket: "" + bucket_url: "" # (i.e.: us-east-1) region: "" ``` diff --git a/pkg/config/collector.go b/pkg/config/collector.go index 1f0a1bb73..14855c465 100644 --- a/pkg/config/collector.go +++ b/pkg/config/collector.go @@ -50,6 +50,6 @@ type FileArchiveConfig struct { } type BlobConfig struct { - Bucket string `mapstructure:"bucket"` // Bucket to use to push k8s resources (e.g.: s3://) - Region string `mapstructure:"region"` // Region to use for the bucket (only for s3) + BucketUrl string `mapstructure:"bucket_url"` // Bucket to use to push k8s resources (e.g.: s3://) + Region string `mapstructure:"region"` // Region to use for the bucket (only for s3) } diff --git a/pkg/config/config.go b/pkg/config/config.go index 2cad68b47..1623193e6 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -108,12 +108,12 @@ func SetDefaultValues(v *viper.Viper) { v.SetDefault(TelemetryEnabled, false) // Default value for MongoDB - v.SetDefault("mongodb.url", DefaultMongoUrl) - v.SetDefault("mongodb.connection_timeout", DefaultConnectionTimeout) + v.SetDefault(MongoUrl, DefaultMongoUrl) + v.SetDefault(MongoConnectionTimeout, DefaultConnectionTimeout) // Defaults values for JanusGraph - v.SetDefault("janusgraph.url", DefaultJanusGraphUrl) - v.SetDefault("janusgraph.connection_timeout", DefaultConnectionTimeout) + v.SetDefault(JanusGraphUrl, DefaultJanusGraphUrl) + v.SetDefault(JanusGrapTimeout, DefaultConnectionTimeout) // Profiler values v.SetDefault(TelemetryProfilerPeriod, DefaultProfilerPeriod) @@ -132,7 +132,7 @@ func SetDefaultValues(v *viper.Viper) { v.SetDefault(IngestorAPIEndpoint, DefaultIngestorAPIEndpoint) v.SetDefault(IngestorAPIInsecure, DefaultIngestorAPIInsecure) - v.SetDefault(IngestorBlobBucketName, DefaultBucketName) + v.SetDefault(IngestorBlobBucketURL, DefaultBucketName) v.SetDefault(IngestorTempDir, DefaultTempDir) v.SetDefault(IngestorMaxArchiveSize, DefaultMaxArchiveSize) v.SetDefault(IngestorArchiveName, DefaultArchiveName) @@ -149,6 +149,17 @@ func SetEnvOverrides(c *viper.Viper) { res = multierror.Append(res, c.BindEnv("collector.file.directory", "KH_COLLECTOR_DIR")) res = multierror.Append(res, c.BindEnv("collector.file.cluster", "KH_COLLECTOR_TARGET")) + res = multierror.Append(res, c.BindEnv(MongoUrl, "KH_MONGODB_URL")) + res = multierror.Append(res, c.BindEnv(JanusGraphUrl, "KH_JANUSGRAPH_URL")) + + res = multierror.Append(res, c.BindEnv(IngestorAPIEndpoint, "KH_INGESTOR_API_ENDPOINT")) + res = multierror.Append(res, c.BindEnv(IngestorAPIInsecure, "KH_INGESTOR_API_INSECURE")) + res = multierror.Append(res, c.BindEnv(IngestorBlobBucketURL, "KH_INGESTOR_BUCKET_URL")) + res = multierror.Append(res, c.BindEnv(IngestorTempDir, "KH_INGESTOR_TEMP_DIR")) + res = multierror.Append(res, c.BindEnv(IngestorMaxArchiveSize, "KH_INGESTOR_MAX_ARCHIVE_SIZE")) + res = multierror.Append(res, c.BindEnv(IngestorArchiveName, "KH_INGESTOR_ARCHIVE_NAME")) + res = multierror.Append(res, c.BindEnv(IngestorBlobRegion, "KH_INGESTOR_REGION")) + if res.ErrorOrNil() != nil { log.I.Fatalf("config environment override: %v", res.ErrorOrNil()) } @@ -234,6 +245,8 @@ func NewEmbedConfig(v *viper.Viper, configPath string) (*KubehoundConfig, error) v.SetConfigType(DefaultConfigType) SetDefaultValues(v) + // Configure environment variable override + SetEnvOverrides(v) data, err := embedconfig.F.ReadFile(configPath) if err != nil { return nil, fmt.Errorf("reading embed config: %w", err) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 203f92468..1d10ebcda 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -81,8 +81,8 @@ func TestMustLoadConfig(t *testing.T) { Insecure: false, }, Blob: &BlobConfig{ - Bucket: "", - Region: "", + BucketUrl: "", + Region: "", }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", @@ -155,8 +155,8 @@ func TestMustLoadConfig(t *testing.T) { Insecure: false, }, Blob: &BlobConfig{ - Bucket: "", - Region: "", + BucketUrl: "", + Region: "", }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", diff --git a/pkg/config/ingestor.go b/pkg/config/ingestor.go index 91c9c2c16..f14390f9b 100644 --- a/pkg/config/ingestor.go +++ b/pkg/config/ingestor.go @@ -14,8 +14,8 @@ const ( IngestorTempDir = "ingestor.temp_dir" IngestorArchiveName = "ingestor.archive_name" - IngestorBlobBucketName = "ingestor.blob.bucket_name" - IngestorBlobRegion = "ingestor.blob.region" + IngestorBlobBucketURL = "ingestor.blob.bucket_url" + IngestorBlobRegion = "ingestor.blob.region" ) type IngestorConfig struct { diff --git a/pkg/config/janusgraph.go b/pkg/config/janusgraph.go index 52db1a58f..726c0dd43 100644 --- a/pkg/config/janusgraph.go +++ b/pkg/config/janusgraph.go @@ -6,6 +6,9 @@ import ( const ( DefaultJanusGraphUrl = "ws://localhost:8182/gremlin" + + JanusGraphUrl = "janusgraph.url" + JanusGrapTimeout = "janusgraph.connection_timeout" ) // JanusGraphConfig configures JanusGraph specific parameters. diff --git a/pkg/config/mongodb.go b/pkg/config/mongodb.go index 89a63719b..f29a684d2 100644 --- a/pkg/config/mongodb.go +++ b/pkg/config/mongodb.go @@ -6,6 +6,9 @@ import ( const ( DefaultMongoUrl = "mongodb://localhost:27017" + + MongoUrl = "mongodb.url" + MongoConnectionTimeout = "mongodb.connection_timeout" ) // MongoDBConfig configures mongodb specific parameters. diff --git a/pkg/ingestor/puller/blob/blob.go b/pkg/ingestor/puller/blob/blob.go index 5a11cd1c6..9e1109a16 100644 --- a/pkg/ingestor/puller/blob/blob.go +++ b/pkg/ingestor/puller/blob/blob.go @@ -39,12 +39,12 @@ type BlobStore struct { var _ puller.DataPuller = (*BlobStore)(nil) func NewBlobStorage(cfg *config.KubehoundConfig, blobConfig *config.BlobConfig) (*BlobStore, error) { - if blobConfig.Bucket == "" { + if blobConfig.BucketUrl == "" { return nil, ErrInvalidBucketName } return &BlobStore{ - bucketName: blobConfig.Bucket, + bucketName: blobConfig.BucketUrl, cfg: cfg, region: blobConfig.Region, }, nil diff --git a/pkg/ingestor/puller/blob/blob_test.go b/pkg/ingestor/puller/blob/blob_test.go index 6e1305dba..aef9f1765 100644 --- a/pkg/ingestor/puller/blob/blob_test.go +++ b/pkg/ingestor/puller/blob/blob_test.go @@ -339,7 +339,7 @@ func TestNewBlobStorage(t *testing.T) { name: "empty bucket name", args: args{ blobConfig: &config.BlobConfig{ - Bucket: "", + BucketUrl: "", }, cfg: &config.KubehoundConfig{ Ingestor: config.IngestorConfig{ @@ -353,7 +353,7 @@ func TestNewBlobStorage(t *testing.T) { name: "valid blob storage", args: args{ blobConfig: &config.BlobConfig{ - Bucket: "fakeBlobStorage", + BucketUrl: "fakeBlobStorage", }, cfg: &config.KubehoundConfig{ Ingestor: config.IngestorConfig{ diff --git a/pkg/kubehound/storage/retrier.go b/pkg/kubehound/storage/retrier.go index c58cfa6b6..0dc82db7b 100644 --- a/pkg/kubehound/storage/retrier.go +++ b/pkg/kubehound/storage/retrier.go @@ -5,6 +5,7 @@ import ( "time" "github.com/DataDog/KubeHound/pkg/config" + "github.com/DataDog/KubeHound/pkg/telemetry/log" ) type Connector[T any] func(ctx context.Context, cfg *config.KubehoundConfig) (T, error) @@ -17,6 +18,7 @@ func Retrier[T any](connector Connector[T], retries int, delay time.Duration) Co if err == nil || r >= retries { return provider, err } + log.I.Warnf("Retrying to connect [%d/%d]", r+1, retries) select { case <-time.After(delay): diff --git a/test/system/kubehound_dump.yaml b/test/system/kubehound_dump.yaml index 4398162c2..892ab4e55 100644 --- a/test/system/kubehound_dump.yaml +++ b/test/system/kubehound_dump.yaml @@ -29,11 +29,11 @@ builder: # Ingestor configuration (for KHaaS) ingestor: blob: - bucket: "" # (i.e.: s3://) + bucket_url: "" # (i.e.: s3://) region: "" # (i.e.: us-west-2) temp_dir: "/tmp/kubehound" archive_name: "archive.tar.gz" max_archive_size: 2147483648 # 2GB api: # GRPC endpoint for the ingestor endpoint: "127.0.0.1:9000" - insecure: true \ No newline at end of file + insecure: true diff --git a/test/system/setup_test.go b/test/system/setup_test.go index d949acf8c..d7e133932 100644 --- a/test/system/setup_test.go +++ b/test/system/setup_test.go @@ -193,7 +193,7 @@ func RunGRPC(ctx context.Context, runArgs *runArgs, p *providers.ProvidersFactor log.I.Fatal(err.Error()) } - khCfg.Ingestor.Blob.Bucket = fmt.Sprintf("file://%s", fileFolder) + khCfg.Ingestor.Blob.BucketUrl = fmt.Sprintf("file://%s", fileFolder) log.I.Info("Creating Blob Storage provider") puller, err := blob.NewBlobStorage(khCfg, khCfg.Ingestor.Blob) if err != nil {