From 8e9c1de891922801b6672c84595e3684edcf6ee1 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Fri, 13 Sep 2024 13:45:31 +0200 Subject: [PATCH 1/4] add env variable for ingestor/grpc image --- pkg/config/collector.go | 4 ++-- pkg/config/config.go | 21 +++++++++++++++++---- pkg/config/config_test.go | 8 ++++---- pkg/config/janusgraph.go | 3 +++ pkg/config/mongodb.go | 3 +++ pkg/ingestor/puller/blob/blob.go | 4 ++-- pkg/ingestor/puller/blob/blob_test.go | 4 ++-- pkg/kubehound/storage/retrier.go | 2 ++ test/system/kubehound_dump.yaml | 4 ++-- test/system/setup_test.go | 2 +- 10 files changed, 38 insertions(+), 17 deletions(-) diff --git a/pkg/config/collector.go b/pkg/config/collector.go index 1f0a1bb73..574944be9 100644 --- a/pkg/config/collector.go +++ b/pkg/config/collector.go @@ -50,6 +50,6 @@ type FileArchiveConfig struct { } type BlobConfig struct { - Bucket string `mapstructure:"bucket"` // Bucket to use to push k8s resources (e.g.: s3://) - Region string `mapstructure:"region"` // Region to use for the bucket (only for s3) + BucketName string `mapstructure:"bucket_name"` // Bucket to use to push k8s resources (e.g.: s3://) + Region string `mapstructure:"region"` // Region to use for the bucket (only for s3) } diff --git a/pkg/config/config.go b/pkg/config/config.go index 2cad68b47..12c36e367 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -108,12 +108,12 @@ func SetDefaultValues(v *viper.Viper) { v.SetDefault(TelemetryEnabled, false) // Default value for MongoDB - v.SetDefault("mongodb.url", DefaultMongoUrl) - v.SetDefault("mongodb.connection_timeout", DefaultConnectionTimeout) + v.SetDefault(MongoUrl, DefaultMongoUrl) + v.SetDefault(MongoConnectionTimeout, DefaultConnectionTimeout) // Defaults values for JanusGraph - v.SetDefault("janusgraph.url", DefaultJanusGraphUrl) - v.SetDefault("janusgraph.connection_timeout", DefaultConnectionTimeout) + v.SetDefault(JanusGraphUrl, DefaultJanusGraphUrl) + v.SetDefault(JanusGrapTimeout, DefaultConnectionTimeout) // Profiler values v.SetDefault(TelemetryProfilerPeriod, DefaultProfilerPeriod) @@ -149,6 +149,17 @@ func SetEnvOverrides(c *viper.Viper) { res = multierror.Append(res, c.BindEnv("collector.file.directory", "KH_COLLECTOR_DIR")) res = multierror.Append(res, c.BindEnv("collector.file.cluster", "KH_COLLECTOR_TARGET")) + res = multierror.Append(res, c.BindEnv(MongoUrl, "KH_MONGODB_URL")) + res = multierror.Append(res, c.BindEnv(JanusGraphUrl, "KH_JANUSGRAPH_URL")) + + res = multierror.Append(res, c.BindEnv(IngestorAPIEndpoint, "KH_INGESTOR_API_ENDPOINT")) + res = multierror.Append(res, c.BindEnv(IngestorAPIInsecure, "KH_INGESTOR_API_INSECURE")) + res = multierror.Append(res, c.BindEnv(IngestorBlobBucketName, "KH_INGESTOR_BUCKET_NAME")) + res = multierror.Append(res, c.BindEnv(IngestorTempDir, "KH_INGESTOR_TEMP_DIR")) + res = multierror.Append(res, c.BindEnv(IngestorMaxArchiveSize, "KH_INGESTOR_MAX_ARCHIVE_SIZE")) + res = multierror.Append(res, c.BindEnv(IngestorArchiveName, "KH_INGESTOR_ARCHIVE_NAME")) + res = multierror.Append(res, c.BindEnv(IngestorBlobRegion, "KH_INGESTOR_REGION")) + if res.ErrorOrNil() != nil { log.I.Fatalf("config environment override: %v", res.ErrorOrNil()) } @@ -234,6 +245,8 @@ func NewEmbedConfig(v *viper.Viper, configPath string) (*KubehoundConfig, error) v.SetConfigType(DefaultConfigType) SetDefaultValues(v) + // Configure environment variable override + SetEnvOverrides(v) data, err := embedconfig.F.ReadFile(configPath) if err != nil { return nil, fmt.Errorf("reading embed config: %w", err) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 203f92468..1728afb5a 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -81,8 +81,8 @@ func TestMustLoadConfig(t *testing.T) { Insecure: false, }, Blob: &BlobConfig{ - Bucket: "", - Region: "", + BucketName: "", + Region: "", }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", @@ -155,8 +155,8 @@ func TestMustLoadConfig(t *testing.T) { Insecure: false, }, Blob: &BlobConfig{ - Bucket: "", - Region: "", + BucketName: "", + Region: "", }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", diff --git a/pkg/config/janusgraph.go b/pkg/config/janusgraph.go index 52db1a58f..726c0dd43 100644 --- a/pkg/config/janusgraph.go +++ b/pkg/config/janusgraph.go @@ -6,6 +6,9 @@ import ( const ( DefaultJanusGraphUrl = "ws://localhost:8182/gremlin" + + JanusGraphUrl = "janusgraph.url" + JanusGrapTimeout = "janusgraph.connection_timeout" ) // JanusGraphConfig configures JanusGraph specific parameters. diff --git a/pkg/config/mongodb.go b/pkg/config/mongodb.go index 89a63719b..f29a684d2 100644 --- a/pkg/config/mongodb.go +++ b/pkg/config/mongodb.go @@ -6,6 +6,9 @@ import ( const ( DefaultMongoUrl = "mongodb://localhost:27017" + + MongoUrl = "mongodb.url" + MongoConnectionTimeout = "mongodb.connection_timeout" ) // MongoDBConfig configures mongodb specific parameters. diff --git a/pkg/ingestor/puller/blob/blob.go b/pkg/ingestor/puller/blob/blob.go index 5a11cd1c6..4000aac92 100644 --- a/pkg/ingestor/puller/blob/blob.go +++ b/pkg/ingestor/puller/blob/blob.go @@ -39,12 +39,12 @@ type BlobStore struct { var _ puller.DataPuller = (*BlobStore)(nil) func NewBlobStorage(cfg *config.KubehoundConfig, blobConfig *config.BlobConfig) (*BlobStore, error) { - if blobConfig.Bucket == "" { + if blobConfig.BucketName == "" { return nil, ErrInvalidBucketName } return &BlobStore{ - bucketName: blobConfig.Bucket, + bucketName: blobConfig.BucketName, cfg: cfg, region: blobConfig.Region, }, nil diff --git a/pkg/ingestor/puller/blob/blob_test.go b/pkg/ingestor/puller/blob/blob_test.go index 6e1305dba..d97963418 100644 --- a/pkg/ingestor/puller/blob/blob_test.go +++ b/pkg/ingestor/puller/blob/blob_test.go @@ -339,7 +339,7 @@ func TestNewBlobStorage(t *testing.T) { name: "empty bucket name", args: args{ blobConfig: &config.BlobConfig{ - Bucket: "", + BucketName: "", }, cfg: &config.KubehoundConfig{ Ingestor: config.IngestorConfig{ @@ -353,7 +353,7 @@ func TestNewBlobStorage(t *testing.T) { name: "valid blob storage", args: args{ blobConfig: &config.BlobConfig{ - Bucket: "fakeBlobStorage", + BucketName: "fakeBlobStorage", }, cfg: &config.KubehoundConfig{ Ingestor: config.IngestorConfig{ diff --git a/pkg/kubehound/storage/retrier.go b/pkg/kubehound/storage/retrier.go index c58cfa6b6..9ca942fc4 100644 --- a/pkg/kubehound/storage/retrier.go +++ b/pkg/kubehound/storage/retrier.go @@ -5,6 +5,7 @@ import ( "time" "github.com/DataDog/KubeHound/pkg/config" + "github.com/DataDog/KubeHound/pkg/telemetry/log" ) type Connector[T any] func(ctx context.Context, cfg *config.KubehoundConfig) (T, error) @@ -13,6 +14,7 @@ func Retrier[T any](connector Connector[T], retries int, delay time.Duration) Co return func(ctx context.Context, cfg *config.KubehoundConfig) (T, error) { for r := 0; ; r++ { var empty T + log.I.Warnf("Trying to connect [%d/%d]", r, retries) provider, err := connector(ctx, cfg) if err == nil || r >= retries { return provider, err diff --git a/test/system/kubehound_dump.yaml b/test/system/kubehound_dump.yaml index 4398162c2..a2614bb74 100644 --- a/test/system/kubehound_dump.yaml +++ b/test/system/kubehound_dump.yaml @@ -29,11 +29,11 @@ builder: # Ingestor configuration (for KHaaS) ingestor: blob: - bucket: "" # (i.e.: s3://) + bucket_name: "" # (i.e.: s3://) region: "" # (i.e.: us-west-2) temp_dir: "/tmp/kubehound" archive_name: "archive.tar.gz" max_archive_size: 2147483648 # 2GB api: # GRPC endpoint for the ingestor endpoint: "127.0.0.1:9000" - insecure: true \ No newline at end of file + insecure: true diff --git a/test/system/setup_test.go b/test/system/setup_test.go index d949acf8c..8224472ed 100644 --- a/test/system/setup_test.go +++ b/test/system/setup_test.go @@ -193,7 +193,7 @@ func RunGRPC(ctx context.Context, runArgs *runArgs, p *providers.ProvidersFactor log.I.Fatal(err.Error()) } - khCfg.Ingestor.Blob.Bucket = fmt.Sprintf("file://%s", fileFolder) + khCfg.Ingestor.Blob.BucketName = fmt.Sprintf("file://%s", fileFolder) log.I.Info("Creating Blob Storage provider") puller, err := blob.NewBlobStorage(khCfg, khCfg.Ingestor.Blob) if err != nil { From 1a83fe3d966a6c6d7432ef617eb8a6812953a0b5 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Fri, 13 Sep 2024 14:31:11 +0200 Subject: [PATCH 2/4] change bucket_name to bucket_url --- configs/etc/kubehound-reference.yaml | 2 +- configs/etc/kubehound.yaml | 10 +++++----- deployments/k8s/khaas/conf/ingestor/kubehound.yaml | 6 +++--- deployments/k8s/khaas/values.yaml | 2 +- docs/user-guide/khaas-101.md | 2 +- pkg/config/collector.go | 4 ++-- pkg/config/config.go | 4 ++-- pkg/config/config_test.go | 8 ++++---- pkg/config/ingestor.go | 4 ++-- pkg/ingestor/puller/blob/blob.go | 4 ++-- pkg/ingestor/puller/blob/blob_test.go | 4 ++-- test/system/kubehound_dump.yaml | 2 +- test/system/setup_test.go | 2 +- 13 files changed, 27 insertions(+), 27 deletions(-) diff --git a/configs/etc/kubehound-reference.yaml b/configs/etc/kubehound-reference.yaml index 92a5a1ec3..f1328644f 100644 --- a/configs/etc/kubehound-reference.yaml +++ b/configs/etc/kubehound-reference.yaml @@ -125,7 +125,7 @@ builder: # ingestor: # blob: # # (i.e.: s3://) -# bucket: "" +# bucket_url: "" # # (i.e.: us-east-1) # region: "" # temp_dir: "/tmp/kubehound" diff --git a/configs/etc/kubehound.yaml b/configs/etc/kubehound.yaml index b36c66c79..7271f7813 100644 --- a/configs/etc/kubehound.yaml +++ b/configs/etc/kubehound.yaml @@ -50,10 +50,10 @@ builder: # Batch size for edge inserts batch_size: 500 - + # Cluster impact batch size for edge inserts batch_size_cluster_impact: 10 - + # Enable for large clusters to prevent number of edges growing exponentially large_cluster_optimizations: true @@ -61,13 +61,13 @@ builder: ingestor: blob: # (i.e.: s3://) - bucket: "" + bucket_url: "" # (i.e.: us-east-1) - region: "" + region: "" temp_dir: "/tmp/kubehound" archive_name: "archive.tar.gz" max_archive_size: 2147483648 # 2GB # GRPC endpoint for the ingestor - api: + api: endpoint: "127.0.0.1:9000" insecure: true diff --git a/deployments/k8s/khaas/conf/ingestor/kubehound.yaml b/deployments/k8s/khaas/conf/ingestor/kubehound.yaml index 6074ad0ca..339366442 100644 --- a/deployments/k8s/khaas/conf/ingestor/kubehound.yaml +++ b/deployments/k8s/khaas/conf/ingestor/kubehound.yaml @@ -18,7 +18,7 @@ collector: # General storage configuration storage: - # Whether or not to wipe all data on startup + # Whether or not to wipe all data on startup wipe: false # Number of connection retries before declaring an error @@ -61,7 +61,7 @@ builder: # Batch size for edge inserts batch_size: 1000 - + # Cluster impact batch size for edge inserts batch_size_cluster_impact: 10 @@ -70,7 +70,7 @@ builder: ingestor: blob: - bucket: "{{ $.Values.services.ingestor.bucket }}" + bucket_url: "{{ $.Values.services.ingestor.bucket_url }}" region: "{{ $.Values.services.ingestor.region }}" temp_dir: "/tmp/kubehound" archive_name: "archive.tar.gz" diff --git a/deployments/k8s/khaas/values.yaml b/deployments/k8s/khaas/values.yaml index c0897b8bf..0e05af021 100644 --- a/deployments/k8s/khaas/values.yaml +++ b/deployments/k8s/khaas/values.yaml @@ -3,7 +3,7 @@ services: ingestor: image: ghcr.io/datadog/kubehound-binary version: latest - bucket: s3:// + bucket_url: s3:// region: "us-east-1" resources: requests: diff --git a/docs/user-guide/khaas-101.md b/docs/user-guide/khaas-101.md index 570788c72..ade1ecb21 100644 --- a/docs/user-guide/khaas-101.md +++ b/docs/user-guide/khaas-101.md @@ -93,7 +93,7 @@ If you don't want to specify the bucket every time, you can set it up in your lo ingestor: blob: # (i.e.: s3://) - bucket: "" + bucket_url: "" # (i.e.: us-east-1) region: "" ``` diff --git a/pkg/config/collector.go b/pkg/config/collector.go index 574944be9..14855c465 100644 --- a/pkg/config/collector.go +++ b/pkg/config/collector.go @@ -50,6 +50,6 @@ type FileArchiveConfig struct { } type BlobConfig struct { - BucketName string `mapstructure:"bucket_name"` // Bucket to use to push k8s resources (e.g.: s3://) - Region string `mapstructure:"region"` // Region to use for the bucket (only for s3) + BucketUrl string `mapstructure:"bucket_url"` // Bucket to use to push k8s resources (e.g.: s3://) + Region string `mapstructure:"region"` // Region to use for the bucket (only for s3) } diff --git a/pkg/config/config.go b/pkg/config/config.go index 12c36e367..1623193e6 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -132,7 +132,7 @@ func SetDefaultValues(v *viper.Viper) { v.SetDefault(IngestorAPIEndpoint, DefaultIngestorAPIEndpoint) v.SetDefault(IngestorAPIInsecure, DefaultIngestorAPIInsecure) - v.SetDefault(IngestorBlobBucketName, DefaultBucketName) + v.SetDefault(IngestorBlobBucketURL, DefaultBucketName) v.SetDefault(IngestorTempDir, DefaultTempDir) v.SetDefault(IngestorMaxArchiveSize, DefaultMaxArchiveSize) v.SetDefault(IngestorArchiveName, DefaultArchiveName) @@ -154,7 +154,7 @@ func SetEnvOverrides(c *viper.Viper) { res = multierror.Append(res, c.BindEnv(IngestorAPIEndpoint, "KH_INGESTOR_API_ENDPOINT")) res = multierror.Append(res, c.BindEnv(IngestorAPIInsecure, "KH_INGESTOR_API_INSECURE")) - res = multierror.Append(res, c.BindEnv(IngestorBlobBucketName, "KH_INGESTOR_BUCKET_NAME")) + res = multierror.Append(res, c.BindEnv(IngestorBlobBucketURL, "KH_INGESTOR_BUCKET_URL")) res = multierror.Append(res, c.BindEnv(IngestorTempDir, "KH_INGESTOR_TEMP_DIR")) res = multierror.Append(res, c.BindEnv(IngestorMaxArchiveSize, "KH_INGESTOR_MAX_ARCHIVE_SIZE")) res = multierror.Append(res, c.BindEnv(IngestorArchiveName, "KH_INGESTOR_ARCHIVE_NAME")) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 1728afb5a..1d10ebcda 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -81,8 +81,8 @@ func TestMustLoadConfig(t *testing.T) { Insecure: false, }, Blob: &BlobConfig{ - BucketName: "", - Region: "", + BucketUrl: "", + Region: "", }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", @@ -155,8 +155,8 @@ func TestMustLoadConfig(t *testing.T) { Insecure: false, }, Blob: &BlobConfig{ - BucketName: "", - Region: "", + BucketUrl: "", + Region: "", }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", diff --git a/pkg/config/ingestor.go b/pkg/config/ingestor.go index 91c9c2c16..f14390f9b 100644 --- a/pkg/config/ingestor.go +++ b/pkg/config/ingestor.go @@ -14,8 +14,8 @@ const ( IngestorTempDir = "ingestor.temp_dir" IngestorArchiveName = "ingestor.archive_name" - IngestorBlobBucketName = "ingestor.blob.bucket_name" - IngestorBlobRegion = "ingestor.blob.region" + IngestorBlobBucketURL = "ingestor.blob.bucket_url" + IngestorBlobRegion = "ingestor.blob.region" ) type IngestorConfig struct { diff --git a/pkg/ingestor/puller/blob/blob.go b/pkg/ingestor/puller/blob/blob.go index 4000aac92..9e1109a16 100644 --- a/pkg/ingestor/puller/blob/blob.go +++ b/pkg/ingestor/puller/blob/blob.go @@ -39,12 +39,12 @@ type BlobStore struct { var _ puller.DataPuller = (*BlobStore)(nil) func NewBlobStorage(cfg *config.KubehoundConfig, blobConfig *config.BlobConfig) (*BlobStore, error) { - if blobConfig.BucketName == "" { + if blobConfig.BucketUrl == "" { return nil, ErrInvalidBucketName } return &BlobStore{ - bucketName: blobConfig.BucketName, + bucketName: blobConfig.BucketUrl, cfg: cfg, region: blobConfig.Region, }, nil diff --git a/pkg/ingestor/puller/blob/blob_test.go b/pkg/ingestor/puller/blob/blob_test.go index d97963418..aef9f1765 100644 --- a/pkg/ingestor/puller/blob/blob_test.go +++ b/pkg/ingestor/puller/blob/blob_test.go @@ -339,7 +339,7 @@ func TestNewBlobStorage(t *testing.T) { name: "empty bucket name", args: args{ blobConfig: &config.BlobConfig{ - BucketName: "", + BucketUrl: "", }, cfg: &config.KubehoundConfig{ Ingestor: config.IngestorConfig{ @@ -353,7 +353,7 @@ func TestNewBlobStorage(t *testing.T) { name: "valid blob storage", args: args{ blobConfig: &config.BlobConfig{ - BucketName: "fakeBlobStorage", + BucketUrl: "fakeBlobStorage", }, cfg: &config.KubehoundConfig{ Ingestor: config.IngestorConfig{ diff --git a/test/system/kubehound_dump.yaml b/test/system/kubehound_dump.yaml index a2614bb74..892ab4e55 100644 --- a/test/system/kubehound_dump.yaml +++ b/test/system/kubehound_dump.yaml @@ -29,7 +29,7 @@ builder: # Ingestor configuration (for KHaaS) ingestor: blob: - bucket_name: "" # (i.e.: s3://) + bucket_url: "" # (i.e.: s3://) region: "" # (i.e.: us-west-2) temp_dir: "/tmp/kubehound" archive_name: "archive.tar.gz" diff --git a/test/system/setup_test.go b/test/system/setup_test.go index 8224472ed..d7e133932 100644 --- a/test/system/setup_test.go +++ b/test/system/setup_test.go @@ -193,7 +193,7 @@ func RunGRPC(ctx context.Context, runArgs *runArgs, p *providers.ProvidersFactor log.I.Fatal(err.Error()) } - khCfg.Ingestor.Blob.BucketName = fmt.Sprintf("file://%s", fileFolder) + khCfg.Ingestor.Blob.BucketUrl = fmt.Sprintf("file://%s", fileFolder) log.I.Info("Creating Blob Storage provider") puller, err := blob.NewBlobStorage(khCfg, khCfg.Ingestor.Blob) if err != nil { From 3e03ccdc6e0ba3a3da5e242f563bb5eaa69e7057 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Fri, 13 Sep 2024 14:37:51 +0200 Subject: [PATCH 3/4] warning message for retrying connection --- pkg/kubehound/storage/retrier.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubehound/storage/retrier.go b/pkg/kubehound/storage/retrier.go index 9ca942fc4..a06a0b726 100644 --- a/pkg/kubehound/storage/retrier.go +++ b/pkg/kubehound/storage/retrier.go @@ -14,11 +14,11 @@ func Retrier[T any](connector Connector[T], retries int, delay time.Duration) Co return func(ctx context.Context, cfg *config.KubehoundConfig) (T, error) { for r := 0; ; r++ { var empty T - log.I.Warnf("Trying to connect [%d/%d]", r, retries) provider, err := connector(ctx, cfg) if err == nil || r >= retries { return provider, err } + log.I.Warnf("Retrying to connect [%d/%d]", r, retries) select { case <-time.After(delay): From 7d71c18c4cfbb5877c32212c5d28ccb38ae1e7e6 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Fri, 13 Sep 2024 14:38:42 +0200 Subject: [PATCH 4/4] typo --- pkg/kubehound/storage/retrier.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubehound/storage/retrier.go b/pkg/kubehound/storage/retrier.go index a06a0b726..0dc82db7b 100644 --- a/pkg/kubehound/storage/retrier.go +++ b/pkg/kubehound/storage/retrier.go @@ -18,7 +18,7 @@ func Retrier[T any](connector Connector[T], retries int, delay time.Duration) Co if err == nil || r >= retries { return provider, err } - log.I.Warnf("Retrying to connect [%d/%d]", r, retries) + log.I.Warnf("Retrying to connect [%d/%d]", r+1, retries) select { case <-time.After(delay):