Skip to content

Commit

Permalink
fix sync_partition_metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
maskarb committed Jan 22, 2025
1 parent 7c12ccf commit 7ee48e4
Show file tree
Hide file tree
Showing 9 changed files with 210 additions and 52 deletions.
21 changes: 13 additions & 8 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ DATABASE_NAME=postgres
PGADMIN_EMAIL=[email protected]
PGADMIN_PASSWORD=postgres
PGADMIN_PORT=8432
POSTGRES_SQL_SERVICE_HOST=localhost
POSTGRES_SQL_SERVICE_PORT=15432
POSTGRES_SQL_SERVICE_HOST=db
POSTGRES_SQL_SERVICE_PORT=5432
DATABASE_USER=postgres
DATABASE_ADMIN=postgres
DATABASE_PASSWORD=postgres
Expand All @@ -32,18 +32,23 @@ TAG_ENABLED_LIMIT=200 # Set the max amount of tags per account
DELAYED_TASK_TIME=30 # Set the seconds before a delayed summary task should expire
DELAYED_TASK_POLLING_MINUTES=5 # Set the time before the delayed task kick off.

ENABLE_S3_ARCHIVING=True
S3_BUCKET_NAME=koku-bucket
S3_BUCKET_PATH=data
S3_ENDPOINT=http://koku-minio:9000
S3_ACCESS_KEY=kokuminioaccess
S3_SECRET=kokuminiosecret
SKIP_MINIO_DATA_DELETION=False

# AWS
AWS_SHARED_CREDENTIALS_FILE=/etc/credentials/aws
AWS_RESOURCE_NAME=YOUR_COST_MANAGEMENT_AWS_ARN

# Glue
SCHEMA_SUFFIX="" # if DEVELOPMENT=True, this can be left empty and will default to $USER; otherwise, set this value to something unique

AWS_CATALOG_ID=589173575009
S3_ENDPOINT=https://s3.us-east-1.amazonaws.com

S3_BUCKET_NAME=hccm-local-s3
S3_ACCESS_KEY=CHANGEME
S3_SECRET=CHANGEME
S3_REGION=us-east-1

# GCP
GOOGLE_APPLICATION_CREDENTIALS=/etc/credentials/gcp
GCP_DATASET='dataset_example'
Expand Down
116 changes: 116 additions & 0 deletions dev/containers/hive-metastore/metastore-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@

<configuration>
<property>
<name>metastore.thrift.port</name>
<value>8000</value>
<description>Hive metastore listener port</description>
</property>
<property>
<name>metastore.thrift.uris</name>
<value>thrift://${env.HOSTNAME}:8000</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<property>
<name>metastore.metrics.enabled</name>
<value>true</value>
<description>Enable metrics on the metastore.</description>
</property>
<property>
<name>metastore.metrics.reporters</name>
<value>jmx</value>
<description>A comma separated list of metrics reporters to start</description>
</property>
<property>
<name>datanucleus.autoStartMechanismMode</name>
<value>ignored</value>
<description>Autostart mechanism for datanucleus. Currently ignored is the only option supported.</description>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>false</value>
<description>Auto creates necessary schema on a startup if one doesn't exist. Set this to false, after creating it once.To enable auto create also set hive.metastore.schema.verification=false. Auto creation is not recommended for production use cases, run schematool command instead.</description>
</property>
<property>
<name>metastore.schema.verification</name>
<value>true</value>
<description>
Enforce metastore schema version consistency.
True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic
schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
proper metastore schema migration. (Default)
False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
</description>
</property>
<property>
<name>hive.default.fileformat</name>
<value>Parquet</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<description>AWS S3 endpoint to connect to.</description>
<value>${env.S3_ENDPOINT}</value>
</property>
<property>
<name>fs.s3a.access.key</name>
<description>AWS access key ID.</description>
<value>${env.S3_ACCESS_KEY}</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<description>AWS secret key.</description>
<value>${env.S3_SECRET}</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
<description>Enable S3 path style access.</description>
</property>
<property>
<name>metastore.warehouse.dir</name>
<value>s3a://${env.S3_BUCKET_NAME}/${env.S3_BUCKET_PATH}/</value>
</property>
<property>
<name>hive.metastore.db.type</name>
<value>postgres</value>
<description>
Expects one of [derby, oracle, mysql, mssql, postgres].
Type of database used by the metastore. Information schema &amp; JDBCStorageHandler depend on it.
</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>${env.DATABASE_USER}</value>
<description>Username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>${env.DATABASE_PASSWORD}</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:postgresql://${env.POSTGRES_SQL_SERVICE_HOST}:${env.POSTGRES_SQL_SERVICE_PORT}/${env.DATABASE_NAME}?sslmode=prefer</value>
<description>
JDBC connect string for a JDBC metastore.
To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.postgresql.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.class</name>
<value>org.apache.hadoop.hive.thrift.DBTokenStore</value>
</property>
<property>
<name>metastore.task.threads.always</name>
<value>org.apache.hadoop.hive.metastore.events.EventCleanerTask</value>
</property>
<property>
<name>metastore.expression.proxy</name>
<value>org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy</value>
</property>
</configuration>
21 changes: 21 additions & 0 deletions dev/containers/trino/etc/catalog/glue.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
connector.name=hive
hive.metastore=glue
hive.storage-format=Parquet
hive.compression-codec=SNAPPY
hive.collect-column-statistics-on-write=true
hive.recursive-directories=true
hive.non-managed-table-writes-enabled=true

fs.hadoop.enabled=false
fs.native-s3.enabled=true
s3.region=${ENV:S3_REGION}
s3.endpoint=${ENV:S3_ENDPOINT}
s3.aws-access-key=${ENV:S3_ACCESS_KEY}
s3.aws-secret-key=${ENV:S3_SECRET}
s3.path-style-access=true

hive.metastore.glue.default-warehouse-dir=s3://${ENV:S3_BUCKET_NAME}/data
hive.metastore.glue.region=${ENV:S3_REGION}
hive.metastore.glue.aws-access-key=${ENV:S3_ACCESS_KEY}
hive.metastore.glue.aws-secret-key=${ENV:S3_SECRET}
hive.metastore.glue.catalogid=${ENV:AWS_CATALOG_ID}
4 changes: 2 additions & 2 deletions dev/containers/trino/etc/catalog/hive.properties
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ hive.parquet.use-column-names=true

fs.native-s3.enabled=true

s3.aws-access-key=${ENV:AWS_ACCESS_KEY_ID}
s3.aws-secret-key=${ENV:AWS_SECRET_ACCESS_KEY}
s3.aws-access-key=${ENV:S3_ACCESS_KEY}
s3.aws-secret-key=${ENV:S3_SECRET}
s3.endpoint=${ENV:S3_ENDPOINT}
s3.path-style-access=true
s3.region=us-east-1
Expand Down
2 changes: 1 addition & 1 deletion dev/containers/trino/etc/catalog/postgres.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
connector.name=postgresql
connection-url=jdbc:postgresql://db:5432/${ENV:DATABASE_NAME}
connection-url=jdbc:postgresql://${ENV:POSTGRES_SQL_SERVICE_HOST}:${ENV:POSTGRES_SQL_SERVICE_PORT}/${ENV:DATABASE_NAME}
connection-user=${ENV:DATABASE_USER}
connection-password=${ENV:DATABASE_PASSWORD}
postgresql.array-mapping=AS_ARRAY
Expand Down
18 changes: 9 additions & 9 deletions dev/scripts/load_test_customer_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ enable_ocp_tags() {
log-info "Enabling OCP tags..."
RESPONSE=$(curl -s -w "%{http_code}\n" --header "Content-Type: application/json" \
--request POST \
--data '{"schema": "org1234567","action": "create","tag_keys": ["environment", "app", "version", "storageclass", "application", "instance-type"], "provider_type": "ocp"}' \
--data '{"schema": "org1234567'"${SCHEMA_SUFFIX}"'","action": "create","tag_keys": ["environment", "app", "version", "storageclass", "application", "instance-type"], "provider_type": "ocp"}' \
"${MASU_URL_PREFIX}"/v1/enabled_tags/)
STATUS_CODE=${RESPONSE: -3}
DATA=${RESPONSE:: -3}
Expand Down Expand Up @@ -278,8 +278,8 @@ build_aws_data() {
render_yaml_files "${_yaml_files[@]}"

log-info "Building OpenShift on ${_source_name} report data..."
nise_report ocp --static-report-file "$YAML_PATH/ocp_on_aws/rendered_ocp_static_data.yml" --ocp-cluster-id my-ocp-cluster-1 --minio-upload http://localhost:9000 --daily-reports --payload-name "$_ocp_payload"
# nise_report ocp --static-report-file "$YAML_PATH/ocp_on_aws/rendered_ocp_static_data.yml" --ocp-cluster-id my-ocp-cluster-1 --minio-upload http://localhost:9000 --payload-name "$_ocp_payload"
nise_report ocp --static-report-file "$YAML_PATH/ocp_on_aws/rendered_ocp_static_data.yml" --ocp-cluster-id my-ocp-cluster-1 --minio-upload "${S3_ENDPOINT}" --daily-reports --payload-name "$_ocp_payload"
# nise_report ocp --static-report-file "$YAML_PATH/ocp_on_aws/rendered_ocp_static_data.yml" --ocp-cluster-id my-ocp-cluster-1 --minio-upload "${S3_ENDPOINT}" --payload-name "$_ocp_payload"
nise_report aws --static-report-file "$YAML_PATH/ocp_on_aws/rendered_aws_static_data.yml" --aws-s3-report-name None --aws-s3-bucket-name "$NISE_DATA_PATH/local_providers/aws_local"

log-info "Cleanup ${_source_name} rendered YAML files..."
Expand Down Expand Up @@ -314,8 +314,8 @@ build_azure_data() {
render_yaml_files "${_yaml_files[@]}"

log-info "Building OpenShift on ${_source_name} report data..."
nise_report ocp --static-report-file "$YAML_PATH/ocp_on_azure/rendered_ocp_static_data.yml" --ocp-cluster-id my-ocp-cluster-2 --minio-upload http://localhost:9000 --daily-reports --payload-name "$_ocp_payload"
# nise_report ocp --static-report-file "$YAML_PATH/ocp_on_azure/rendered_ocp_static_data.yml" --ocp-cluster-id my-ocp-cluster-2 --minio-upload http://localhost:9000 --payload-name "$_ocp_payload"
nise_report ocp --static-report-file "$YAML_PATH/ocp_on_azure/rendered_ocp_static_data.yml" --ocp-cluster-id my-ocp-cluster-2 --minio-upload "${S3_ENDPOINT}" --daily-reports --payload-name "$_ocp_payload"
# nise_report ocp --static-report-file "$YAML_PATH/ocp_on_azure/rendered_ocp_static_data.yml" --ocp-cluster-id my-ocp-cluster-2 --minio-upload "${S3_ENDPOINT}" --payload-name "$_ocp_payload"
nise_report azure --static-report-file "$YAML_PATH/ocp_on_azure/rendered_azure_static_data.yml" --azure-container-name "$NISE_DATA_PATH/local_providers/azure_local" --azure-report-name azure-report
nise_report azure --static-report-file "$YAML_PATH/rendered_azure_v2.yml" --azure-container-name "$NISE_DATA_PATH/local_providers/azure_local" --azure-report-name azure-report-v2 --resource-group

Expand Down Expand Up @@ -350,8 +350,8 @@ build_gcp_data() {
render_yaml_files "${_yaml_files[@]}"

log-info "Building OpenShift on ${_source_name} report data..."
nise_report ocp --static-report-file "$YAML_PATH/ocp_on_gcp/rendered_ocp_static_data.yml" --ocp-cluster-id test-ocp-gcp-cluster --minio-upload http://localhost:9000 --daily-reports --payload-name "$_ocp_payload"
# nise_report ocp --static-report-file "$YAML_PATH/ocp_on_gcp/rendered_ocp_static_data.yml" --ocp-cluster-id test-ocp-gcp-cluster --minio-upload http://localhost:9000 --payload-name "$_ocp_payload"
nise_report ocp --static-report-file "$YAML_PATH/ocp_on_gcp/rendered_ocp_static_data.yml" --ocp-cluster-id test-ocp-gcp-cluster --minio-upload "${S3_ENDPOINT}" --daily-reports --payload-name "$_ocp_payload"
# nise_report ocp --static-report-file "$YAML_PATH/ocp_on_gcp/rendered_ocp_static_data.yml" --ocp-cluster-id test-ocp-gcp-cluster --minio-upload "${S3_ENDPOINT}" --payload-name "$_ocp_payload"
nise_report gcp --static-report-file "$YAML_PATH/gcp/rendered_gcp_static_data.yml" --gcp-bucket-name "$NISE_DATA_PATH/local_providers/gcp_local"
nise_report gcp --static-report-file "$YAML_PATH/ocp_on_gcp/rendered_gcp_static_data.yml" --gcp-bucket-name "$NISE_DATA_PATH/local_providers/gcp_local_0" -r

Expand Down Expand Up @@ -379,8 +379,8 @@ build_onprem_data() {
render_yaml_files "${_yaml_files[@]}"

log-info "Building OpenShift on ${_source_name} report data..."
nise_report ocp --static-report-file "$YAML_PATH/ocp/rendered_ocp_on_premise.yml" --ocp-cluster-id my-ocp-cluster-3 --minio-upload http://localhost:9000 --daily-reports --payload-name "$_ocp_payload"
# nise_report ocp --static-report-file "$YAML_PATH/ocp/rendered_ocp_on_premise.yml" --ocp-cluster-id my-ocp-cluster-3 --minio-upload http://localhost:9000 --payload-name "$_ocp_payload"
nise_report ocp --static-report-file "$YAML_PATH/ocp/rendered_ocp_on_premise.yml" --ocp-cluster-id my-ocp-cluster-3 --minio-upload "${S3_ENDPOINT}" --daily-reports --payload-name "$_ocp_payload"
# nise_report ocp --static-report-file "$YAML_PATH/ocp/rendered_ocp_on_premise.yml" --ocp-cluster-id my-ocp-cluster-3 --minio-upload "${S3_ENDPOINT}" --payload-name "$_ocp_payload"

log-info "Cleanup ${_source_name} rendered YAML files..."
cleanup_rendered_files "${_rendered_yaml_files[@]}"
Expand Down
Loading

0 comments on commit 7ee48e4

Please sign in to comment.