Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[COST-5368] POC - Glue #5250

Draft
wants to merge 65 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
1c241be
switch to glue
maskarb Aug 5, 2024
f737e00
poc file
maskarb Aug 6, 2024
228a224
add glue catalog
maskarb Aug 6, 2024
a9e28de
idk somethin
maskarb Aug 8, 2024
6e56c42
hive migration script
maskarb Aug 15, 2024
d0a98bc
some cleanup
maskarb Aug 15, 2024
2cb2979
some cleanup
maskarb Aug 15, 2024
0516869
Merge branch 'main' into glue2
maskarb Aug 15, 2024
47b526f
Merge branch 'main' into glue2
maskarb Sep 16, 2024
3ae3fb3
Merge branch 'main' into glue2
maskarb Oct 4, 2024
e54db9c
adding local config + glue clean make command
lcouzens Oct 11, 2024
56967fb
add migration script
maskarb Oct 14, 2024
972c4ce
remove polars
maskarb Oct 14, 2024
19458a0
Merge branch 'main' into glue2
maskarb Oct 16, 2024
1fa0aad
Merge branch 'main' into glue2
maskarb Oct 16, 2024
f7b57da
Merge branch 'main' into glue2
maskarb Oct 21, 2024
e585b8a
some updates
maskarb Oct 21, 2024
c8ba1ec
add suffix to sources org-id
maskarb Oct 22, 2024
8ca5972
fix test
maskarb Oct 22, 2024
a3b55ec
add env variable to docker compose
maskarb Oct 23, 2024
7c94ab9
attempt glue testing
maskarb Oct 25, 2024
176fd0e
change trino refs
maskarb Oct 25, 2024
bdc9d9e
whatever trino hash this is...
maskarb Oct 25, 2024
1a8cea3
add org-id-suffix
maskarb Oct 25, 2024
32875e5
try pointing to glue
maskarb Oct 25, 2024
dd52bba
pass profile name to session
maskarb Oct 25, 2024
169572e
undo pipfile
maskarb Dec 9, 2024
5653d45
Merge branch 'main' into glue2
maskarb Dec 9, 2024
d8bc7a3
reconfig config
maskarb Dec 11, 2024
1104e4d
clowdapp
maskarb Dec 11, 2024
71bf6e8
add suffix in sources flow
maskarb Dec 11, 2024
c7c0905
regex replace special characters with _
maskarb Dec 12, 2024
e169bcb
add org suffix to enabled_tags masu endpoint
maskarb Dec 12, 2024
aee0114
add the params
maskarb Dec 12, 2024
7ad09b5
lint
maskarb Dec 12, 2024
8d0d36a
Merge branch 'main' into glue2
maskarb Dec 16, 2024
c0214b5
changes for dev rds
maskarb Dec 17, 2024
444cd62
Merge branch 'main' into glue2
maskarb Jan 6, 2025
05e4192
Merge branch 'main' into glue2
maskarb Jan 8, 2025
1f009f9
move trino ref
maskarb Jan 8, 2025
97c88fe
remove koku-aws secret
maskarb Jan 8, 2025
216cb3d
fix bucket name
maskarb Jan 8, 2025
28c13de
fix bucket names
maskarb Jan 9, 2025
7b3a983
Merge branch 'main' into glue2
maskarb Jan 10, 2025
4744325
set hcs specific variables
maskarb Jan 10, 2025
b5c19a9
Merge branch 'main' into glue2
maskarb Jan 12, 2025
645301d
undo bucket changes for now
maskarb Jan 12, 2025
44820d5
add schema suffix to yet another masu endpoint
maskarb Jan 12, 2025
54cb190
change suffix to SCHEMA_SUFFIX
maskarb Jan 13, 2025
64972a9
simplify
maskarb Jan 13, 2025
42eda15
simplify
maskarb Jan 13, 2025
3aab9b4
simplify
maskarb Jan 13, 2025
c6da3e0
add back import
maskarb Jan 13, 2025
0cbcdda
Merge branch 'main' into glue2
maskarb Jan 13, 2025
f702c4d
remove temp stuff
maskarb Jan 13, 2025
a5a55b6
fix tests
maskarb Jan 13, 2025
5f4fec3
update cleanup script
maskarb Jan 14, 2025
b56a2ca
Merge branch 'main' into glue2
maskarb Jan 14, 2025
f72c314
Merge branch 'main' into glue2
maskarb Jan 15, 2025
573c9cb
add trino schema prefix to clowdapp
maskarb Jan 15, 2025
718f6f7
add schema prefix and s3a_or_s3
maskarb Jan 15, 2025
55c641c
update trino
maskarb Jan 16, 2025
94b14ab
test trino
maskarb Jan 16, 2025
32679b1
make clowdapp
maskarb Jan 16, 2025
8a537ea
Merge branch 'main' into glue2
maskarb Jan 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
try pointing to glue
  • Loading branch information
maskarb committed Oct 25, 2024
commit 32875e5d8b0efd9b9ff76a5fa521d65f10cd6aaf
2 changes: 1 addition & 1 deletion deploy/clowdapp.yaml
Original file line number Diff line number Diff line change
@@ -5710,7 +5710,7 @@ parameters:
name: ORG_ID_SUFFIX
value: ""
- name: S3_BUCKET_NAME
value: koku-eph-s3
value: mskarbek-glue
- name: S3_ROS_BUCKET_NAME
value: ros-eph-s3
- name: S3_SUBS_BUCKET_NAME
2 changes: 1 addition & 1 deletion deploy/kustomize/base/base.yaml
Original file line number Diff line number Diff line change
@@ -283,7 +283,7 @@ parameters:
description: DEVELOPMENT ONLY - used to prevent clashing Glue databases during ephemeral testing
value: ""
- name: S3_BUCKET_NAME
value: koku-eph-s3
value: mskarbek-glue
- name: S3_ROS_BUCKET_NAME
value: ros-eph-s3
- name: S3_SUBS_BUCKET_NAME
2 changes: 1 addition & 1 deletion koku/masu/celery/tasks.py
Original file line number Diff line number Diff line change
@@ -156,7 +156,7 @@ def deleted_archived_with_prefix(s3_bucket_name, prefix):
prefix (str): The prefix for deletion
"""
context = {"service_task": "purge_old_data"}
s3_resource = get_s3_resource(settings.S3_ACCESS_KEY, settings.S3_SECRET, settings.S3_REGION)
s3_resource = get_s3_resource(profile_name="default")
s3_bucket = s3_resource.Bucket(s3_bucket_name)
object_keys = [s3_object.key for s3_object in s3_bucket.objects.filter(Prefix=prefix)]
LOG.info(f"starting objects: {len(object_keys)}")
10 changes: 5 additions & 5 deletions koku/masu/util/aws/common.py
Original file line number Diff line number Diff line change
@@ -512,7 +512,7 @@ def get_bills_from_provider(
return bills


def get_s3_resource(access_key, secret_key, region): # pragma: no cover
def get_s3_resource(access_key=None, secret_key=None, region=None, profile_name=None): # pragma: no cover
"""
Obtain the s3 session client
"""
@@ -535,7 +535,7 @@ def copy_data_to_s3_bucket(request_id, path, filename, data, metadata=None, cont
extra_args = {}
if metadata:
extra_args["Metadata"] = metadata
s3_resource = get_s3_resource(settings.S3_ACCESS_KEY, settings.S3_SECRET, settings.S3_REGION)
s3_resource = get_s3_resource(profile_name="default")
s3_obj = {"bucket_name": settings.S3_BUCKET_NAME, "key": upload_key}
upload = s3_resource.Object(**s3_obj)
try:
@@ -578,7 +578,7 @@ def copy_local_hcs_report_file_to_s3_bucket(


def _get_s3_objects(s3_path):
s3_resource = get_s3_resource(settings.S3_ACCESS_KEY, settings.S3_SECRET, settings.S3_REGION)
s3_resource = get_s3_resource(profile_name="default")
return s3_resource.Bucket(settings.S3_BUCKET_NAME).objects.filter(Prefix=s3_path)


@@ -682,7 +682,7 @@ def filter_s3_objects_less_than(
if context is None:
context = {}

s3_resource = get_s3_resource(settings.S3_ACCESS_KEY, settings.S3_SECRET, settings.S3_REGION)
s3_resource = get_s3_resource(profile_name="default")

filtered = []
for key in keys:
@@ -794,7 +794,7 @@ def delete_s3_objects_not_matching_metadata(
def delete_s3_objects(request_id, keys_to_delete, context) -> list[str]:
keys_to_delete = [{"Key": key} for key in keys_to_delete]
LOG.info(log_json(request_id, msg="attempting to batch delete s3 files", context=context))
s3_resource = get_s3_resource(settings.S3_ACCESS_KEY, settings.S3_SECRET, settings.S3_REGION)
s3_resource = get_s3_resource(profile_name="default")
s3_bucket = s3_resource.Bucket(settings.S3_BUCKET_NAME)
try:
batch_size = 1000 # AWS S3 delete API limits to 1000 objects per request.
Loading