Skip to content

Commit

Permalink
re-run covariate balance and upload results
Browse files Browse the repository at this point in the history
  • Loading branch information
Admin_FBu2 authored and Admin_FBu2 committed Dec 19, 2023
1 parent d83b233 commit c4c35e8
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 77 deletions.
33 changes: 33 additions & 0 deletions extra/CodeToRunAllDrugs-OpenClaims.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,36 @@ execute(connectionDetails = connectionDetails,
computeCovariateBalance = TRUE,
exportToCsv = TRUE,
maxCores = maxCores)


# ## **RUN THIS ONLY iF NECESSARY!**
# ## re-run computing covariate balance step
# ## need to delete all files under "drug/balance"
# ## OR, rename "drug/balance" folder to something else
# newOutputFolder1 = file.path(paste0(outputFolder, "-1"))
# exportSettings = LegendT2dm:::createExportSettings(exportAnalysisInfo = FALSE,
# exportStudyResults = FALSE,
# exportStudyDiagnostics = TRUE,
# exportDateTimeInfo = FALSE,
# exportBalanceOnly = TRUE)
# execute(connectionDetails = connectionDetails,
# cdmDatabaseSchema = cdmDatabaseSchema,
# oracleTempSchema = oracleTempSchema,
# cohortDatabaseSchema = cohortDatabaseSchema,
# outputFolder = newOutputFolder1,
# indicationId = indicationId,
# databaseId = databaseId,
# databaseName = databaseName,
# databaseDescription = databaseDescription,
# tablePrefix = tablePrefix,
# createExposureCohorts = FALSE,
# createOutcomeCohorts = FALSE,
# createPairedExposureSummary = FALSE, # not re-create exposure summary file
# fetchAllDataFromServer = FALSE,
# generateAllCohortMethodDataObjects = FALSE,
# runCohortMethod = FALSE,
# computeCovariateBalance = FALSE,
# exportToCsv = TRUE,
# exportSettings = exportSettings,
# maxCores = maxCores)

84 changes: 45 additions & 39 deletions extra/CodeToRunRedShift.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Sys.setenv(DATABASECONNECTOR_JAR_FOLDER="d:/Drivers")
options(andromedaTempFolder = "E:/andromedaTemp")
oracleTempSchema <- NULL

# # Feb 2023: fast forward data version to the latest accessible
# Feb 2023: fast forward data version to the latest accessible
# cdmDatabaseSchema <- "cdm_truven_ccae_v2182"
# serverSuffix <- "truven_ccae"
# cohortDatabaseSchema <- "scratch_fbu2"
Expand All @@ -18,14 +18,14 @@ oracleTempSchema <- NULL
# outputFolder <- "E:/LegendT2dmOutput_ccae_drug2"

# # Feb 2023: fast forward data version to the latest accessible
cdmDatabaseSchema <- "cdm_optum_ehr_v2247" #v2137
serverSuffix <- "optum_ehr"
cohortDatabaseSchema <- "scratch_fbu2"
databaseId <- "OptumEHR"
databaseName <- "Optum© de-identified Electronic Health Record Dataset"
databaseDescription <- "Optum© de-identified Electronic Health Record Dataset represents Humedica’s Electronic Health Record data a medical records database. The medical record data includes clinical information, inclusive of prescriptions as prescribed and administered, lab results, vital signs, body measurements, diagnoses, procedures, and information derived from clinical Notes using Natural Language Processing (NLP)."
tablePrefix <- "legend_t2dm_optum_ehr"
outputFolder <- "E:/LegendT2dmOutput_optum_ehr_drug2"
# cdmDatabaseSchema <- "cdm_optum_ehr_v2247" #v2137
# serverSuffix <- "optum_ehr"
# cohortDatabaseSchema <- "scratch_fbu2"
# databaseId <- "OptumEHR"
# databaseName <- "Optum© de-identified Electronic Health Record Dataset"
# databaseDescription <- "Optum© de-identified Electronic Health Record Dataset represents Humedica’s Electronic Health Record data a medical records database. The medical record data includes clinical information, inclusive of prescriptions as prescribed and administered, lab results, vital signs, body measurements, diagnoses, procedures, and information derived from clinical Notes using Natural Language Processing (NLP)."
# tablePrefix <- "legend_t2dm_optum_ehr"
# outputFolder <- "E:/LegendT2dmOutput_optum_ehr_drug2"

# Feb 2023: fast forward data version to the latest accessible
# cdmDatabaseSchema <- "cdm_truven_mdcr_v2322" #v2183
Expand All @@ -39,14 +39,14 @@ outputFolder <- "E:/LegendT2dmOutput_optum_ehr_drug2"

# # Feb 2023: fast forward data version to the latest accessible
# TBD: run drug-level study on MDCD
# cdmDatabaseSchema <- "cdm_truven_mdcd_v2321" #v2128
# serverSuffix <- "truven_mdcd"
# cohortDatabaseSchema <- "scratch_fbu2"
# databaseId<- "MDCD"
# databaseName <- "IBM Health MarketScan® Multi-State Medicaid Database"
# databaseDescription <- "IBM MarketScan® Multi-State Medicaid Database (MDCD) adjudicated US health insurance claims for Medicaid enrollees from multiple states and includes hospital discharge diagnoses, outpatient diagnoses and procedures, and outpatient pharmacy claims as well as ethnicity and Medicare eligibility. Members maintain their same identifier even if they leave the system for a brief period however the dataset lacks lab data."
# tablePrefix <- "legend_t2dm_mdcd"
# outputFolder <- "E:/LegendT2dmOutput_mdcd_drug2"
cdmDatabaseSchema <- "cdm_truven_mdcd_v2321" #v2128
serverSuffix <- "truven_mdcd"
cohortDatabaseSchema <- "scratch_fbu2"
databaseId<- "MDCD"
databaseName <- "IBM Health MarketScan® Multi-State Medicaid Database"
databaseDescription <- "IBM MarketScan® Multi-State Medicaid Database (MDCD) adjudicated US health insurance claims for Medicaid enrollees from multiple states and includes hospital discharge diagnoses, outpatient diagnoses and procedures, and outpatient pharmacy claims as well as ethnicity and Medicare eligibility. Members maintain their same identifier even if they leave the system for a brief period however the dataset lacks lab data."
tablePrefix <- "legend_t2dm_mdcd"
outputFolder <- "E:/LegendT2dmOutput_mdcd_drug2"

# # Feb 2023: fast forward data version to the latest accessible
cdmDatabaseSchema <- "cdm_optum_extended_dod_v2323" #v2228 #v2134
Expand Down Expand Up @@ -159,28 +159,34 @@ execute(connectionDetails = conn,
exportToCsv = TRUE,
maxCores = 10)

# # try re-packaging OptumEHR result files
# execute(
# connectionDetails = conn,
# cdmDatabaseSchema = cdmDatabaseSchema,
# oracleTempSchema = oracleTempSchema,
# cohortDatabaseSchema = cohortDatabaseSchema,
# outputFolder = outputFolder,
# indicationId = "drug",
# databaseId = databaseId,
# databaseName = databaseName,
# databaseDescription = databaseDescription,
# tablePrefix = tablePrefix,
# createExposureCohorts = FALSE,
# createOutcomeCohorts = FALSE,
# fetchAllDataFromServer = FALSE,
# generateAllCohortMethodDataObjects = FALSE,
# runCohortMethod = FALSE,
# runSections = c(1:6),
# computeCovariateBalance = FALSE,
# exportToCsv = TRUE,
# maxCores = 4
# )
# re-run compute covariate and results export
exportSettings = LegendT2dm:::createExportSettings(exportAnalysisInfo = FALSE,
exportStudyResults = FALSE,
exportStudyDiagnostics = TRUE,
exportDateTimeInfo = FALSE,
exportBalanceOnly = TRUE)
execute(
connectionDetails = conn,
cdmDatabaseSchema = cdmDatabaseSchema,
oracleTempSchema = oracleTempSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
outputFolder = outputFolder,
indicationId = "drug",
databaseId = databaseId,
databaseName = databaseName,
databaseDescription = databaseDescription,
tablePrefix = tablePrefix,
createExposureCohorts = FALSE,
createOutcomeCohorts = FALSE,
fetchAllDataFromServer = FALSE,
generateAllCohortMethodDataObjects = FALSE,
runCohortMethod = FALSE,
runSections = c(1:6),
computeCovariateBalance = TRUE,
exportToCsv = TRUE,
exportSettings = exportSettings,
maxCores = 16
)


#### test staged execution code on a big JnJ data source ----
Expand Down
90 changes: 52 additions & 38 deletions extra/addBalanceColumnsOnResultsDatabase.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
schema = "legendt2dm_drug_results"
#schema = "legendt2dm_class_results"

Sys.setenv(DATABASECONNECTOR_JAR_FOLDER="d:/Drivers")

connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "postgresql",
server = paste(keyring::key_get("ohdsiPostgresServer"),
Expand All @@ -15,45 +17,57 @@ connection = DatabaseConnector::connect(connectionDetails)


##### create an additional diagnostics on the results schema
DatabaseConnector::executeSql(
connection,
sprintf("SET search_path TO %s;", schema),
progressBar = FALSE,
reportOverallTime = FALSE
)

sql <- "ALTER TABLE covariate_balance
ADD COLUMN interaction_covariate_id BIGINT ,
ADD COLUMN target_sd_before NUMERIC ,
ADD COLUMN comparator_sd_before NUMERIC ,
ADD COLUMN mean_before NUMERIC ,
ADD COLUMN sd_before NUMERIC ,
ADD COLUMN target_sd_after NUMERIC ,
ADD COLUMN comparator_sd_after NUMERIC ,
ADD COLUMN mean_after NUMERIC ,
ADD COLUMN sd_after NUMERIC ,
ADD COLUMN target_sum_before NUMERIC ,
ADD COLUMN comparator_sum_before NUMERIC ,
ADD COLUMN target_sum_after NUMERIC ,
ADD COLUMN comparator_sum_after NUMERIC;"

DatabaseConnector::executeSql(connection, sql)
# # only run this once!!
# DatabaseConnector::executeSql(
# connection,
# sprintf("SET search_path TO %s;", schema),
# progressBar = FALSE,
# reportOverallTime = FALSE
# )

# sql <- "ALTER TABLE covariate_balance
# ADD COLUMN interaction_covariate_id BIGINT ,
# ADD COLUMN target_sd_before NUMERIC ,
# ADD COLUMN comparator_sd_before NUMERIC ,
# ADD COLUMN mean_before NUMERIC ,
# ADD COLUMN sd_before NUMERIC ,
# ADD COLUMN target_sd_after NUMERIC ,
# ADD COLUMN comparator_sd_after NUMERIC ,
# ADD COLUMN mean_after NUMERIC ,
# ADD COLUMN sd_after NUMERIC ,
# ADD COLUMN target_sum_before NUMERIC ,
# ADD COLUMN comparator_sum_before NUMERIC ,
# ADD COLUMN target_sum_after NUMERIC ,
# ADD COLUMN comparator_sum_after NUMERIC;"

# DatabaseConnector::executeSql(connection, sql)

DatabaseConnector::disconnect(connection)

## test it by uploading one table
outputFolder = "rrr"
balanceExportPath = file.path(outputFolder, "drug", "export", "covariate_balance.csv")
balance = readr::read_csv(balanceExportPath)

names(balance) = SqlRender::camelCaseToSnakeCase(names(diagnostics))

DatabaseConnector::insertTable(
connection = connection,
tableName = paste(schema, "covariate_balance", sep = "."),
data = balance,
dropTableIfExists = FALSE,
createTable = FALSE,
tempTable = FALSE,
progressBar = TRUE
)
outputFolder = "E:/LegendT2dmOutput_optum_ehr_drug2"
#balanceExportPath = file.path(outputFolder, "drug", "export", "covariate_balance.csv")
#balance = readr::read_csv(balanceExportPath)

#names(balance) = SqlRender::camelCaseToSnakeCase(names(balance))

# DatabaseConnector::insertTable(
# connection = connection,
# tableName = paste(schema, "covariate_balance", sep = "."),
# data = balance,
# dropTableIfExists = FALSE,
# createTable = FALSE,
# tempTable = FALSE,
# progressBar = TRUE
# )

## test upload
outputFolder = "E:/LegendT2dmOutput_optum_dod_drug2"
exportFolder = file.path(outputFolder, "drug", "export")
tablesNames = c("covariate_balance")

uploadResultsToDatabaseFromCsv(connectionDetails = connectionDetails,
schema = schema,
exportFolder = exportFolder,
tableNames = c("covariate_balance"),
specifications = readr::read_csv("inst/settings/ResultsModelSpecs1.csv"))

0 comments on commit c4c35e8

Please sign in to comment.