Skip to content

Commit

Permalink
[COST-3617] - Fix GCP end of month crossover summary (#5080)
Browse files Browse the repository at this point in the history
* [COST-3617] - Fix GCP end of month crossover summary
  • Loading branch information
lcouzens authored May 7, 2024
1 parent 2dd7915 commit f6e7ebb
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 15 deletions.
8 changes: 8 additions & 0 deletions koku/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,14 @@ def gcp_find_invoice_months_in_date_range(self, start, end):
invoice_months.append(invoice_month)
return invoice_months

def get_year_month_list_from_start_end(self, start, end):
if isinstance(start, datetime.date):
start = datetime.datetime(start.year, start.month, start.day, tzinfo=settings.UTC)
if isinstance(end, datetime.date):
end = datetime.datetime(end.year, end.month, end.day, tzinfo=settings.UTC)
dates = self.list_months(start, end)
return [{"year": date.strftime("%Y"), "month": date.strftime("%m")} for date in dates]


def materialized_view_month_start(dh=DateHelper()):
"""Datetime of midnight on the first of the month where materialized summary starts."""
Expand Down
13 changes: 7 additions & 6 deletions koku/masu/database/gcp_report_db_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from api.common import log_json
from api.provider.models import Provider
from api.utils import DateHelper
from koku.database import SQLScriptAtomicExecutorMixin
from masu.database import GCP_REPORT_TABLE_MAP
from masu.database import OCP_REPORT_TABLE_MAP
Expand Down Expand Up @@ -107,6 +108,7 @@ def populate_line_item_daily_summary_table_trino(
(None)
"""
date_dicts = DateHelper().get_year_month_list_from_start_end(start_date, end_date)
last_month_end = datetime.date.today().replace(day=1) - datetime.timedelta(days=1)
if end_date == last_month_end:

Expand Down Expand Up @@ -134,15 +136,14 @@ def populate_line_item_daily_summary_table_trino(
"schema": self.schema,
"table": TRINO_LINE_ITEM_TABLE,
"source_uuid": source_uuid,
"year": invoice_month_date.strftime("%Y"),
"month": invoice_month_date.strftime("%m"),
"markup": markup_value or 0,
"bill_id": bill_id,
}

self._execute_trino_raw_sql_query(
sql, sql_params=sql_params, log_ref="reporting_gcpcostentrylineitem_daily_summary.sql"
)
for date_dict in date_dicts:
sql_params = sql_params | {"year": date_dict["year"], "month": date_dict["month"]}
self._execute_trino_raw_sql_query(
sql, sql_params=sql_params, log_ref="reporting_gcpcostentrylineitem_daily_summary.sql"
)

def populate_tags_summary_table(self, bill_ids, start_date, end_date):
"""Populate the line item aggregated totals data table."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ CROSS JOIN
WHERE source = '{{source_uuid | sqlsafe}}'
AND year = '{{year | sqlsafe}}'
AND month = '{{month | sqlsafe}}'
AND invoice_month = '{{year | sqlsafe}}{{month | sqlsafe}}'
AND usage_start_time >= TIMESTAMP '{{start_date | sqlsafe}}'
AND usage_start_time < date_add('day', 1, TIMESTAMP '{{end_date | sqlsafe}}')
GROUP BY billing_account_id,
Expand Down
12 changes: 7 additions & 5 deletions koku/masu/external/downloader/gcp/gcp_report_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,14 @@ def create_daily_archives(
data_frame = pd_read_csv(local_file_path)
data_frame = add_label_columns(data_frame)
# putting it in for loop handles crossover data, when we have distinct invoice_month
unique_usage_days = pd.to_datetime(data_frame["usage_start_time"]).dt.date.unique()
days = list({day.strftime("%Y-%m-%d") for day in unique_usage_days})
date_range = {"start": min(days), "end": max(days)}
for invoice_month in data_frame["invoice.month"].unique():
invoice_filter = data_frame["invoice.month"] == invoice_month
invoice_month_data = data_frame[invoice_filter]
unique_usage_days = pd.to_datetime(invoice_month_data["usage_start_time"]).dt.date.unique()
days = list({day.strftime("%Y-%m-%d") for day in unique_usage_days})
date_range = {"start": min(days), "end": max(days), "invoice_month": str(invoice_month)}
# We may be able to completely remove invoice month in the future
date_range["invoice_month"] = str(invoice_month)
partition_dates = invoice_month_data.partition_date.unique()
for partition_date in partition_dates:
partition_date_filter = invoice_month_data["partition_date"] == partition_date
Expand Down Expand Up @@ -129,8 +131,8 @@ def create_daily_archives(
tracing_id, s3_csv_path, day_filepath, day_file, manifest_id, context
)
daily_file_names.append(day_filepath)
except Exception:
msg = f"unable to create daily archives from: {local_file_paths}"
except Exception as e:
msg = f"unable to create daily archives from: {local_file_paths}. reason: {e}"
LOG.info(log_json(tracing_id, msg=msg, context=context))
raise CreateDailyArchivesError(msg)
return daily_file_names, date_range
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ def create_daily_archives(tracing_id, account, provider_uuid, filename, filepath
LOG.error(f"File {filepath} could not be parsed. Reason: {str(error)}")
raise GCPReportDownloaderError(error)
# putting it in for loop handles crossover data, when we have distinct invoice_month
unique_usage_days = pd.to_datetime(data_frame["usage_start_time"]).dt.date.unique()
days = list({day.strftime("%Y-%m-%d") for day in unique_usage_days})
date_range = {"start": min(days), "end": max(days)}
for invoice_month in data_frame["invoice.month"].unique():
invoice_filter = data_frame["invoice.month"] == invoice_month
invoice_month_data = data_frame[invoice_filter]
unique_usage_days = pd.to_datetime(invoice_month_data["usage_start_time"]).dt.date.unique()
days = list({day.strftime("%Y-%m-%d") for day in unique_usage_days})
date_range = {"start": min(days), "end": max(days), "invoice_month": str(invoice_month)}
# We may be able to completely remove invoice month in the future
date_range["invoice_month"] = str(invoice_month)
partition_dates = invoice_month_data.partition_date.unique()
for partition_date in partition_dates:
partition_date_filter = invoice_month_data["partition_date"] == partition_date
Expand Down
3 changes: 3 additions & 0 deletions koku/masu/processor/parquet/parquet_report_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,9 @@ def convert_to_parquet(self): # noqa: C901
daily_data_frames.extend(daily_frame)
if self.provider_type not in (Provider.PROVIDER_AZURE):
self.create_daily_parquet(parquet_base_filename, daily_frame)
if self.provider_type in [Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL]:
# Sync partitions on each file to create partitions that cross month bondaries
self.create_parquet_table(parquet_base_filename)
if not success:
msg = "failed to convert files to parquet"
LOG.warning(
Expand Down

0 comments on commit f6e7ebb

Please sign in to comment.