From 50f510e5edacaf1f0aa76515560ac3bf478ef9ac Mon Sep 17 00:00:00 2001 From: Luke Couzens Date: Tue, 17 Sep 2024 13:54:06 +0100 Subject: [PATCH] [COST-5133] - Clean up some unused columns (#5305) * [COST-5133] - Clean up some unused columns for Trino managed table OCP on Azure --- .../managed_azure_openshift_daily.sql | 272 +++++------------- .../azure/azure_report_parquet_processor.py | 4 +- koku/masu/util/azure/azure_post_processor.py | 4 - koku/masu/util/azure/common.py | 11 - koku/reporting/provider/azure/models.py | 29 +- 5 files changed, 76 insertions(+), 244 deletions(-) diff --git a/koku/masu/database/trino_sql/azure/openshift/managed_azure_openshift_daily.sql b/koku/masu/database/trino_sql/azure/openshift/managed_azure_openshift_daily.sql index 44b22fc4ca..106122e173 100644 --- a/koku/masu/database/trino_sql/azure/openshift/managed_azure_openshift_daily.sql +++ b/koku/masu/database/trino_sql/azure/openshift/managed_azure_openshift_daily.sql @@ -1,81 +1,39 @@ -- Now create our proper table if it does not exist CREATE TABLE IF NOT EXISTS hive.{{schema | sqlsafe}}.managed_azure_openshift_daily ( - invoicesectionname varchar, accountname varchar, - accountownerid varchar, - subscriptionguid varchar, - subscriptionname varchar, - resourcegroup varchar, - resourcelocation varchar, + additionalinfo varchar, + billingcurrency varchar, + billingcurrencycode varchar, + consumedservice varchar, + costinbillingcurrency double, date timestamp(3), + effectiveprice double, + frequency varchar, + isazurecrediteligible varchar, metercategory varchar, - metersubcategory varchar, - meterid varchar, metername varchar, - meterregion varchar, - unitofmeasure varchar, - quantity double, - effectiveprice double, - costinbillingcurrency double, - costcenter varchar, - consumedservice varchar, - tags varchar, - offerid varchar, - additionalinfo varchar, - serviceinfo1 varchar, - serviceinfo2 varchar, - resourcename varchar, - reservationid varchar, - reservationname varchar, - unitprice double, - productorderid varchar, - productordername varchar, - term varchar, - publishertype varchar, + metersubcategory varchar, + productname varchar, publishername varchar, - chargetype varchar, - frequency varchar, - pricingmodel varchar, - availabilityzone varchar, - billingaccountid varchar, - billingcurrencycode varchar, - billingaccountname varchar, - billingperiodstartdate timestamp(3), - billingperiodenddate timestamp(3), - billingprofileid varchar, - billingprofilename varchar, + publishertype varchar, + quantity double, + resourcegroup varchar, resourceid varchar, - invoicesectionid varchar, - isazurecrediteligible varchar, - partnumber varchar, - marketprice varchar, - planname varchar, + resourcelocation varchar, + resourcetype varchar, servicefamily varchar, - invoiceid varchar, - previousinvoiceid varchar, - resellername varchar, - resellermpnid varchar, - serviceperiodenddate varchar, - serviceperiodstartdate varchar, - productname varchar, - productid varchar, - publisherid varchar, - location varchar, - pricingcurrencycode varchar, - costinpricingcurrency varchar, - costinusd varchar, - paygcostinbillingcurrency varchar, - paygcostinusd varchar, - exchangerate varchar, - exchangeratedate varchar, - billingcurrency varchar, + serviceinfo1 varchar, + serviceinfo2 varchar, servicename varchar, - resourcetype varchar, - subscriptionid varchar, servicetier varchar, - paygprice double, - resourcerate double, + subscriptionguid varchar, + subscriptionid varchar, + subscriptionname varchar, + tags varchar, + term varchar, + unitofmeasure varchar, + unitprice double, resource_id_matched boolean, matched_tag varchar, source varchar, @@ -88,81 +46,39 @@ CREATE TABLE IF NOT EXISTS hive.{{schema | sqlsafe}}.managed_azure_openshift_dai -- Direct resource matching INSERT INTO hive.{{schema | sqlsafe}}.managed_azure_openshift_daily ( - invoicesectionname, accountname, - accountownerid, - subscriptionguid, - subscriptionname, - resourcegroup, - resourcelocation, + additionalinfo, + billingcurrency, + billingcurrencycode, + consumedservice, + costinbillingcurrency, date, + effectiveprice, + frequency, + isazurecrediteligible, metercategory, - metersubcategory, - meterid, metername, - meterregion, - unitofmeasure, - quantity, - effectiveprice, - costinbillingcurrency, - costcenter, - consumedservice, - tags, - offerid, - additionalinfo, - serviceinfo1, - serviceinfo2, - resourcename, - reservationid, - reservationname, - unitprice, - productorderid, - productordername, - term, - publishertype, + metersubcategory, + productname, publishername, - chargetype, - frequency, - pricingmodel, - availabilityzone, - billingaccountid, - billingcurrencycode, - billingaccountname, - billingperiodstartdate, - billingperiodenddate, - billingprofileid, - billingprofilename, + publishertype, + quantity, + resourcegroup, resourceid, - invoicesectionid, - isazurecrediteligible, - partnumber, - marketprice, - planname, + resourcelocation, + resourcetype, servicefamily, - invoiceid, - previousinvoiceid, - resellername, - resellermpnid, - serviceperiodenddate, - serviceperiodstartdate, - productname, - productid, - publisherid, - location, - pricingcurrencycode, - costinpricingcurrency, - costinusd, - paygcostinbillingcurrency, - paygcostinusd, - exchangerate, - exchangeratedate, - billingcurrency, + serviceinfo1, + serviceinfo2, servicename, - resourcetype, - subscriptionid, servicetier, - paygprice, - resourcerate, + subscriptionguid, + subscriptionid, + subscriptionname, + tags, + term, + unitofmeasure, + unitprice, resource_id_matched, matched_tag, source, @@ -225,81 +141,39 @@ cte_tag_matches AS ( cte_agg_tags AS ( SELECT array_agg(matched_tag) as matched_tags from cte_tag_matches ) -SELECT azure.invoicesectionname, - azure.accountname, - azure.accountownerid, - azure.subscriptionguid, - azure.subscriptionname, - azure.resourcegroup, - azure.resourcelocation, +SELECT azure.accountname, + azure.additionalinfo, + azure.billingcurrency, + azure.billingcurrencycode, + azure.consumedservice, + azure.costinbillingcurrency, azure.date, + azure.effectiveprice, + azure.frequency, + azure.isazurecrediteligible, azure.metercategory, - azure.metersubcategory, - azure.meterid, azure.metername, - azure.meterregion, - azure.unitofmeasure, - azure.quantity, - azure.effectiveprice, - azure.costinbillingcurrency, - azure.costcenter, - azure.consumedservice, - azure.tags, - azure.offerid, - azure.additionalinfo, - azure.serviceinfo1, - azure.serviceinfo2, - azure.resourcename, - azure.reservationid, - azure.reservationname, - azure.unitprice, - azure.productorderid, - azure.productordername, - azure.term, - azure.publishertype, + azure.metersubcategory, + azure.productname, azure.publishername, - azure.chargetype, - azure.frequency, - azure.pricingmodel, - azure.availabilityzone, - azure.billingaccountid, - azure.billingcurrencycode, - azure.billingaccountname, - azure.billingperiodstartdate, - azure.billingperiodenddate, - azure.billingprofileid, - azure.billingprofilename, + azure.publishertype, + azure.quantity, + azure.resourcegroup, azure.resourceid, - azure.invoicesectionid, - azure.isazurecrediteligible, - azure.partnumber, - azure.marketprice, - azure.planname, + azure.resourcelocation, + azure.resourcetype, azure.servicefamily, - azure.invoiceid, - azure.previousinvoiceid, - azure.resellername, - azure.resellermpnid, - azure.serviceperiodenddate, - azure.serviceperiodstartdate, - azure.productname, - azure.productid, - azure.publisherid, - azure.location, - azure.pricingcurrencycode, - azure.costinpricingcurrency, - azure.costinusd, - azure.paygcostinbillingcurrency, - azure.paygcostinusd, - azure.exchangerate, - azure.exchangeratedate, - azure.billingcurrency, + azure.serviceinfo1, + azure.serviceinfo2, azure.servicename, - azure.resourcetype, - azure.subscriptionid, azure.servicetier, - azure.paygprice, - azure.resourcerate, + azure.subscriptionguid, + azure.subscriptionid, + azure.subscriptionname, + azure.tags, + azure.term, + azure.unitofmeasure, + azure.unitprice, CASE WHEN resource_names.resourceid IS NOT NULL THEN TRUE ELSE FALSE diff --git a/koku/masu/processor/azure/azure_report_parquet_processor.py b/koku/masu/processor/azure/azure_report_parquet_processor.py index d4c0a096f2..9b823c7a16 100644 --- a/koku/masu/processor/azure/azure_report_parquet_processor.py +++ b/koku/masu/processor/azure/azure_report_parquet_processor.py @@ -19,13 +19,11 @@ class AzureReportParquetProcessor(ReportParquetProcessorBase): def __init__(self, manifest_id, account, s3_path, provider_uuid, parquet_local_path): numeric_columns = [ "quantity", - "resourcerate", "costinbillingcurrency", "effectiveprice", "unitprice", - "paygprice", ] - date_columns = ["date", "billingperiodstartdate", "billingperiodenddate"] + date_columns = ["date"] boolean_columns = ["resource_id_matched"] column_types = { "numeric_columns": numeric_columns, diff --git a/koku/masu/util/azure/azure_post_processor.py b/koku/masu/util/azure/azure_post_processor.py index 449b1651d8..a3d11c1e1d 100644 --- a/koku/masu/util/azure/azure_post_processor.py +++ b/koku/masu/util/azure/azure_post_processor.py @@ -79,14 +79,10 @@ def get_column_converters(self, col_names, panda_kwargs): """ converters = { "date": azure_date_converter, - "billingperiodstartdate": azure_date_converter, - "billingperiodenddate": azure_date_converter, "quantity": safe_float, - "resourcerate": safe_float, "costinbillingcurrency": safe_float, "effectiveprice": safe_float, "unitprice": safe_float, - "paygprice": safe_float, "tags": azure_json_converter, "additionalinfo": azure_json_converter, } diff --git a/koku/masu/util/azure/common.py b/koku/masu/util/azure/common.py index 8b6ecd9750..3ce71b0fa3 100644 --- a/koku/masu/util/azure/common.py +++ b/koku/masu/util/azure/common.py @@ -21,30 +21,19 @@ INGRESS_REQUIRED_COLUMNS = { "additionalinfo", - "billingaccountid", - "billingaccountname", - "billingperiodenddate", - "billingperiodstartdate", - "chargetype", "consumedservice", "costinbillingcurrency", "date", "effectiveprice", "metercategory", - "meterid", "metername", - "meterregion", "metersubcategory", - "offerid", "productname", "publishername", "publishertype", "quantity", - "reservationid", - "reservationname", "resourceid", "resourcelocation", - "resourcename", "servicefamily", "serviceinfo1", "serviceinfo2", diff --git a/koku/reporting/provider/azure/models.py b/koku/reporting/provider/azure/models.py index a0d026226f..178606faa5 100644 --- a/koku/reporting/provider/azure/models.py +++ b/koku/reporting/provider/azure/models.py @@ -16,51 +16,26 @@ TRINO_MANAGED_OCP_AZURE_DAILY_TABLE = "managed_azure_openshift_daily" TRINO_REQUIRED_COLUMNS = { - "billingperiodstartdate": pd.NaT, - "billingperiodenddate": pd.NaT, - "date": pd.NaT, "accountname": "", - "accountownerid": "", "additionalinfo": "", - "availabilityzone": "", - "billingaccountid": "", - "billingaccountname": "", - "billingcurrencycode": "", "billingcurrency": "", - "billingprofileid": "", - "billingprofilename": "", - "chargetype": "", + "billingcurrencycode": "", "consumedservice": "", - "costcenter": "", "costinbillingcurrency": 0.0, + "date": pd.NaT, "effectiveprice": 0.0, "frequency": "", - "invoicesectionid": "", - "invoicesectionname": "", "isazurecrediteligible": "", "metercategory": "", - "meterid": "", "metername": "", - "meterregion": "", "metersubcategory": "", - "offerid": "", - "partnumber": "", - "paygprice": 0.0, - "planname": "", - "pricingmodel": "", "productname": "", - "productorderid": "", - "productordername": "", "publishername": "", "publishertype": "", "quantity": 0.0, - "reservationid": "", - "reservationname": "", "resourcegroup": "", "resourceid": "", "resourcelocation": "", - "resourcename": "", - "resourcerate": 0.0, "resourcetype": "", "servicefamily": "", "serviceinfo1": "",