From 8f1f84977e0bf9658ea9d97e8ace039e441d2803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 09:28:20 +0200 Subject: [PATCH 01/63] Draft dm_meta() --- R/learn.R | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/R/learn.R b/R/learn.R index f8700ab33..d0b7c7ce5 100644 --- a/R/learn.R +++ b/R/learn.R @@ -47,6 +47,109 @@ dm_learn_from_db <- function(dest, dbname = NULL, ...) { return() } + if (!is_mssql(con)) { + return(dm_learn_from_db_legacy(dest, dbname, ...)) + } + + dm_learn_from_db_meta(dest, catalog = dbname, ...) +} + +dm_learn_from_db_meta <- function(dest, catalog = NULL, schema = NULL) { + info <- dm_meta(con, catalog = dbname, ) + info +} + +dm_meta <- function(con, catalog = NULL, schema = NULL) { + schemata <- tbl_lc(con, dbplyr::ident_q("information_schema.schemata")) + tables <- tbl_lc(con, dbplyr::ident_q("information_schema.tables")) + views <- tbl_lc(con, dbplyr::ident_q("information_schema.views")) + columns <- tbl_lc(con, dbplyr::ident_q("information_schema.columns")) + table_constraints <- tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints")) + referential_constraints <- tbl_lc(con, dbplyr::ident_q("information_schema.referential_constraints")) + key_column_usage <- tbl_lc(con, dbplyr::ident_q("information_schema.key_column_usage")) + + # not on mariadb: + constraint_column_usage <- tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) + + if (!is.null(catalog)) { + schemata <- schemata %>% filter(catalog_name %in% !!catalog) + tables <- tables %>% filter(table_catalog %in% !!catalog) + views <- views %>% filter(table_catalog %in% !!catalog) + columns <- columns %>% filter(table_catalog %in% !!catalog) + table_constraints <- table_constraints %>% filter(table_catalog %in% !!catalog) + referential_constraints <- referential_constraints %>% filter(table_catalog %in% !!catalog) + key_column_usage <- key_column_usage %>% filter(table_catalog %in% !!catalog) + constraint_column_usage <- constraint_column_usage %>% filter(table_catalog %in% !!catalog) + } + + if (!is.null(schema)) { + schemata <- schemata %>% filter(schema_name %in% !!catalog) + tables <- tables %>% filter(table_schema %in% !!schema) + views <- views %>% filter(table_schema %in% !!schema) + columns <- columns %>% filter(table_schema %in% !!schema) + table_constraints <- table_constraints %>% filter(table_schema %in% !!schema) + referential_constraints <- referential_constraints %>% filter(table_schema %in% !!schema) + key_column_usage <- key_column_usage %>% filter(table_schema %in% !!schema) + constraint_column_usage <- constraint_column_usage %>% filter(table_schema %in% !!schema) + } + + info_raw <- dm(schemata, tables, columns, views, table_constraints, referential_constraints, key_column_usage, constraint_column_usage) + info_raw + + info <- + info_raw %>% + dm_add_pk(schemata, c(catalog_name, schema_name)) %>% + dm_add_pk(tables, c(table_catalog, table_schema, table_name)) %>% + dm_add_fk(tables, c(table_catalog, table_schema), schemata) %>% + dm_add_pk(columns, c(table_catalog, table_schema, table_name, column_name)) %>% + dm_add_fk(columns, c(table_catalog, table_schema, table_name), tables) %>% + dm_add_fk(views, c(table_catalog, table_schema, table_name), tables) %>% + #dm_add_fk(table_constraints, table_schema, schemata) %>% + dm_add_pk(table_constraints, c(constraint_catalog, constraint_schema, constraint_name)) %>% + dm_add_fk(table_constraints, c(table_catalog, table_schema, table_name), tables) %>% + # constraint_schema vs. table_schema? + + # do we even need this? + dm_add_pk(referential_constraints, c(constraint_catalog, constraint_schema, constraint_name)) %>% + dm_add_fk(referential_constraints, c(constraint_catalog, constraint_schema), schemata) %>% + dm_select(referential_constraints, constraint_catalog, constraint_schema, constraint_name, everything()) %>% + + # not on mssql: + #dm_add_fk(referential_constraints, c(constraint_schema, table_name), tables) %>% + #dm_add_fk(referential_constraints, c(constraint_schema, referenced_table_name), tables) %>% + + dm_add_fk(key_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% + + # not on mssql: + #dm_add_fk(key_column_usage, c(referenced_table_schema, referenced_table_name, referenced_column_name), columns) %>% + + dm_add_fk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% + + # not on mariadb; + dm_add_fk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% + + # not on mssql: + #dm_add_fk(constraint_column_usage, c(referenced_table_schema, referenced_table_name, referenced_column_name), columns) %>% + + # not on mariadb: + dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% + + dm_set_colors(brown = c(tables, columns, views), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) + + info +} + +tbl_lc <- function(con, name) { + out <- tbl(con, name) + names <- colnames(out) + names_lc <- tolower(names) + if (all(names == names_lc)) { + return(out) + } + out %>% rename(!!!set_names(syms(names), names_lc)) +} + +dm_learn_from_db_legacy <- function(dest, dbname, ...) { sql <- db_learn_query(con, dbname = dbname, ...) if (is.null(sql)) { return() From 2ae1a065e780a8ab5a066ca74caf1e74b704c45b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 09:28:59 +0200 Subject: [PATCH 02/63] Show <- again in rigg() --- R/zzz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index 0f8a525af..d8d84f1a4 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -22,5 +22,5 @@ rigg <- function(fun) { rig <- get("rig", asNamespace("boomer"), mode = "function") - assign(name, rig(fun, ignore = c("~", "{", "(", "<-", "<<-")), getNamespace("dm")) + assign(name, rig(fun, ignore = c("~", "{", "(")), getNamespace("dm")) } From e56ee25d160b7266cfc339724d41347ff90f0dac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 09:29:43 +0200 Subject: [PATCH 03/63] Copy from other branch --- scratch/info.R | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 scratch/info.R diff --git a/scratch/info.R b/scratch/info.R new file mode 100644 index 000000000..fe2124173 --- /dev/null +++ b/scratch/info.R @@ -0,0 +1,54 @@ +library(tidyverse) +pkgload::load_all() + +#src <- dm::dm_get_src(dm_financial()) +#src <- test_src_maria() +#src <- test_src_postgres() +#src <- test_src_mssql() + +# DuckDB doesn't have references +#src <- test_src_duckdb() + +con <- src$con + +if (FALSE) { + try(DBI::dbRemoveTable(con, "airlines")) + try(DBI::dbRemoveTable(con, "airports")) + try(DBI::dbRemoveTable(con, "planes")) + try(DBI::dbRemoveTable(con, "flights")) + try(DBI::dbRemoveTable(con, "weather")) + dm_nycflights13() %>% + dm_zoom_to(flights) %>% + semi_join(planes) %>% + dm_update_zoomed() %>% + copy_dm_to(con, ., temporary = FALSE) +} + +DBI::dbListTables(con) + +obj <- DBI::dbListObjects(con) +obj %>% filter(is_prefix) + +DBI::dbListObjects(con, DBI::Id(schema = "INFORMATION_SCHEMA")) + +info <- + dm_meta(con) + +info %>% + dm_draw() + +info_local <- + info %>% + collect() + +info_local %>% + dm_nrow() + +info_local$TABLE_CONSTRAINTS %>% + filter(TABLE_SCHEMA == "Financial_ijs") + +info_local$TABLE_CONSTRAINTS %>% + count(CONSTRAINT_TYPE) + +info_local$REFERENTIAL_CONSTRAINTS %>% + filter(CONSTRAINT_SCHEMA == "Financial_ijs") From 5682b272d9d269c0285c06c2b7108fab15c39bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 09:47:54 +0200 Subject: [PATCH 04/63] Tweak --- R/learn.R | 4 ++-- scratch/info.R | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/learn.R b/R/learn.R index d0b7c7ce5..05e2fb85e 100644 --- a/R/learn.R +++ b/R/learn.R @@ -55,7 +55,7 @@ dm_learn_from_db <- function(dest, dbname = NULL, ...) { } dm_learn_from_db_meta <- function(dest, catalog = NULL, schema = NULL) { - info <- dm_meta(con, catalog = dbname, ) + info <- dm_meta(con, catalog = dbname, schema = schema) info } @@ -83,7 +83,7 @@ dm_meta <- function(con, catalog = NULL, schema = NULL) { } if (!is.null(schema)) { - schemata <- schemata %>% filter(schema_name %in% !!catalog) + schemata <- schemata %>% filter(schema_name %in% !!schema) tables <- tables %>% filter(table_schema %in% !!schema) views <- views %>% filter(table_schema %in% !!schema) columns <- columns %>% filter(table_schema %in% !!schema) diff --git a/scratch/info.R b/scratch/info.R index fe2124173..67ff0884c 100644 --- a/scratch/info.R +++ b/scratch/info.R @@ -4,7 +4,7 @@ pkgload::load_all() #src <- dm::dm_get_src(dm_financial()) #src <- test_src_maria() #src <- test_src_postgres() -#src <- test_src_mssql() +src <- test_src_mssql() # DuckDB doesn't have references #src <- test_src_duckdb() @@ -24,7 +24,7 @@ if (FALSE) { copy_dm_to(con, ., temporary = FALSE) } -DBI::dbListTables(con) +DBI::dbListTables(con, schema_name = "information_schema") obj <- DBI::dbListObjects(con) obj %>% filter(is_prefix) @@ -32,11 +32,14 @@ obj %>% filter(is_prefix) DBI::dbListObjects(con, DBI::Id(schema = "INFORMATION_SCHEMA")) info <- - dm_meta(con) + dm_meta(con, schema = "dbo") info %>% dm_draw() +info %>% + dm_get_tables() + info_local <- info %>% collect() From 82c9624a6597efff837826fb6fcaca1474f23b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 14:17:46 +0200 Subject: [PATCH 05/63] Keep old behavior for now --- R/learn.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/learn.R b/R/learn.R index 05e2fb85e..068bc39a6 100644 --- a/R/learn.R +++ b/R/learn.R @@ -47,9 +47,9 @@ dm_learn_from_db <- function(dest, dbname = NULL, ...) { return() } - if (!is_mssql(con)) { + #if (!is_mssql(con)) { return(dm_learn_from_db_legacy(dest, dbname, ...)) - } + #} dm_learn_from_db_meta(dest, catalog = dbname, ...) } From aed588492db30abf0ecdd67a582e8ea6241b852d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 17:11:43 +0200 Subject: [PATCH 06/63] Add column selection, remove unneeded tables --- R/learn.R | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/R/learn.R b/R/learn.R index 068bc39a6..2871d72fe 100644 --- a/R/learn.R +++ b/R/learn.R @@ -60,24 +60,36 @@ dm_learn_from_db_meta <- function(dest, catalog = NULL, schema = NULL) { } dm_meta <- function(con, catalog = NULL, schema = NULL) { - schemata <- tbl_lc(con, dbplyr::ident_q("information_schema.schemata")) - tables <- tbl_lc(con, dbplyr::ident_q("information_schema.tables")) - views <- tbl_lc(con, dbplyr::ident_q("information_schema.views")) - columns <- tbl_lc(con, dbplyr::ident_q("information_schema.columns")) - table_constraints <- tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints")) - referential_constraints <- tbl_lc(con, dbplyr::ident_q("information_schema.referential_constraints")) - key_column_usage <- tbl_lc(con, dbplyr::ident_q("information_schema.key_column_usage")) + schemata <- + tbl_lc(con, dbplyr::ident_q("information_schema.schemata")) %>% + select(catalog_name, schema_name) + + tables <- + tbl_lc(con, dbplyr::ident_q("information_schema.tables")) %>% + select(table_catalog, table_schema, table_name, table_type) + + columns <- + tbl_lc(con, dbplyr::ident_q("information_schema.columns")) %>% + select(table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) + + table_constraints <- + tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints")) %>% + select(constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) + + key_column_usage <- + tbl_lc(con, dbplyr::ident_q("information_schema.key_column_usage")) %>% + select(constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position, position_in_unique_constraint) # not on mariadb: - constraint_column_usage <- tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) + constraint_column_usage <- + tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% + select(table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name) if (!is.null(catalog)) { schemata <- schemata %>% filter(catalog_name %in% !!catalog) tables <- tables %>% filter(table_catalog %in% !!catalog) - views <- views %>% filter(table_catalog %in% !!catalog) columns <- columns %>% filter(table_catalog %in% !!catalog) table_constraints <- table_constraints %>% filter(table_catalog %in% !!catalog) - referential_constraints <- referential_constraints %>% filter(table_catalog %in% !!catalog) key_column_usage <- key_column_usage %>% filter(table_catalog %in% !!catalog) constraint_column_usage <- constraint_column_usage %>% filter(table_catalog %in% !!catalog) } @@ -85,15 +97,13 @@ dm_meta <- function(con, catalog = NULL, schema = NULL) { if (!is.null(schema)) { schemata <- schemata %>% filter(schema_name %in% !!schema) tables <- tables %>% filter(table_schema %in% !!schema) - views <- views %>% filter(table_schema %in% !!schema) columns <- columns %>% filter(table_schema %in% !!schema) table_constraints <- table_constraints %>% filter(table_schema %in% !!schema) - referential_constraints <- referential_constraints %>% filter(table_schema %in% !!schema) key_column_usage <- key_column_usage %>% filter(table_schema %in% !!schema) constraint_column_usage <- constraint_column_usage %>% filter(table_schema %in% !!schema) } - info_raw <- dm(schemata, tables, columns, views, table_constraints, referential_constraints, key_column_usage, constraint_column_usage) + info_raw <- dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) info_raw info <- @@ -103,17 +113,11 @@ dm_meta <- function(con, catalog = NULL, schema = NULL) { dm_add_fk(tables, c(table_catalog, table_schema), schemata) %>% dm_add_pk(columns, c(table_catalog, table_schema, table_name, column_name)) %>% dm_add_fk(columns, c(table_catalog, table_schema, table_name), tables) %>% - dm_add_fk(views, c(table_catalog, table_schema, table_name), tables) %>% #dm_add_fk(table_constraints, table_schema, schemata) %>% dm_add_pk(table_constraints, c(constraint_catalog, constraint_schema, constraint_name)) %>% dm_add_fk(table_constraints, c(table_catalog, table_schema, table_name), tables) %>% # constraint_schema vs. table_schema? - # do we even need this? - dm_add_pk(referential_constraints, c(constraint_catalog, constraint_schema, constraint_name)) %>% - dm_add_fk(referential_constraints, c(constraint_catalog, constraint_schema), schemata) %>% - dm_select(referential_constraints, constraint_catalog, constraint_schema, constraint_name, everything()) %>% - # not on mssql: #dm_add_fk(referential_constraints, c(constraint_schema, table_name), tables) %>% #dm_add_fk(referential_constraints, c(constraint_schema, referenced_table_name), tables) %>% @@ -134,7 +138,7 @@ dm_meta <- function(con, catalog = NULL, schema = NULL) { # not on mariadb: dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% - dm_set_colors(brown = c(tables, columns, views), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) + dm_set_colors(brown = c(tables, columns), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) info } From 83eef21e43a1b112ab739af0bfc689bbe17cc271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 17:48:57 +0200 Subject: [PATCH 07/63] Select later --- R/learn.R | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/R/learn.R b/R/learn.R index 2871d72fe..5ffc4f5a7 100644 --- a/R/learn.R +++ b/R/learn.R @@ -61,29 +61,36 @@ dm_learn_from_db_meta <- function(dest, catalog = NULL, schema = NULL) { dm_meta <- function(con, catalog = NULL, schema = NULL) { schemata <- - tbl_lc(con, dbplyr::ident_q("information_schema.schemata")) %>% - select(catalog_name, schema_name) + tbl_lc(con, dbplyr::ident_q("information_schema.schemata"))# %>% + #select(catalog_name, schema_name) tables <- - tbl_lc(con, dbplyr::ident_q("information_schema.tables")) %>% - select(table_catalog, table_schema, table_name, table_type) + tbl_lc(con, dbplyr::ident_q("information_schema.tables"))# %>% + #select(table_catalog, table_schema, table_name, table_type) columns <- - tbl_lc(con, dbplyr::ident_q("information_schema.columns")) %>% - select(table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) + tbl_lc(con, dbplyr::ident_q("information_schema.columns"))# %>% + #select(table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) table_constraints <- - tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints")) %>% - select(constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) + tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints"))# %>% + #select(constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) key_column_usage <- - tbl_lc(con, dbplyr::ident_q("information_schema.key_column_usage")) %>% - select(constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position, position_in_unique_constraint) + tbl_lc(con, dbplyr::ident_q("information_schema.key_column_usage"))# %>% + # position_in_unique_constraint: removed, can't be part of a primary key + #select(constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position) # not on mariadb: constraint_column_usage <- tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% - select(table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name) + + # Postgres: + group_by(constraint_catalog, constraint_schema, constraint_name) %>% + mutate(ordinal_position = row_number()) %>% + ungroup() + # %>% + #select(table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name) if (!is.null(catalog)) { schemata <- schemata %>% filter(catalog_name %in% !!catalog) @@ -122,21 +129,14 @@ dm_meta <- function(con, catalog = NULL, schema = NULL) { #dm_add_fk(referential_constraints, c(constraint_schema, table_name), tables) %>% #dm_add_fk(referential_constraints, c(constraint_schema, referenced_table_name), tables) %>% + dm_add_pk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% dm_add_fk(key_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% - - # not on mssql: - #dm_add_fk(key_column_usage, c(referenced_table_schema, referenced_table_name, referenced_column_name), columns) %>% - dm_add_fk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% # not on mariadb; dm_add_fk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% - - # not on mssql: - #dm_add_fk(constraint_column_usage, c(referenced_table_schema, referenced_table_name, referenced_column_name), columns) %>% - - # not on mariadb: dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% + dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position), key_column_usage) %>% dm_set_colors(brown = c(tables, columns), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) From f254e6e306e753d574c96f6cc5d29068f2432a11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 17:50:06 +0200 Subject: [PATCH 08/63] Postgres, PK and FK --- scratch/info.R | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/scratch/info.R b/scratch/info.R index 67ff0884c..59812f8ad 100644 --- a/scratch/info.R +++ b/scratch/info.R @@ -3,8 +3,8 @@ pkgload::load_all() #src <- dm::dm_get_src(dm_financial()) #src <- test_src_maria() -#src <- test_src_postgres() -src <- test_src_mssql() +src <- test_src_postgres() +#src <- test_src_mssql() # DuckDB doesn't have references #src <- test_src_duckdb() @@ -32,7 +32,7 @@ obj %>% filter(is_prefix) DBI::dbListObjects(con, DBI::Id(schema = "INFORMATION_SCHEMA")) info <- - dm_meta(con, schema = "dbo") + dm_meta(con) info %>% dm_draw() @@ -40,6 +40,29 @@ info %>% info %>% dm_get_tables() +pk <- + info %>% + dm_zoom_to(table_constraints) %>% + filter(constraint_type == "PRIMARY KEY") %>% + dm_update_zoomed() %>% + dm_zoom_to(key_column_usage) %>% + semi_join(table_constraints) + +pk + +fk <- + info %>% + dm_zoom_to(table_constraints) %>% + filter(constraint_type == "FOREIGN KEY") %>% + dm_update_zoomed() %>% + dm_zoom_to(constraint_column_usage) %>% + semi_join(table_constraints) %>% + rename(fk_table_catalog = table_catalog, fk_table_schema = table_schema, fk_table_name = table_name, fk_column_name = column_name) %>% + left_join(key_column_usage) %>% + rename(pk_table_catalog = table_catalog, pk_table_schema = table_schema, pk_table_name = table_name, pk_column_name = column_name) + +fk + info_local <- info %>% collect() From 1d5f0bcd0e86014eedc3225d5f485153196e5ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 29 Apr 2021 18:04:43 +0200 Subject: [PATCH 09/63] Split function --- R/learn.R | 92 +++++++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/R/learn.R b/R/learn.R index 5ffc4f5a7..6d4ffc768 100644 --- a/R/learn.R +++ b/R/learn.R @@ -60,26 +60,18 @@ dm_learn_from_db_meta <- function(dest, catalog = NULL, schema = NULL) { } dm_meta <- function(con, catalog = NULL, schema = NULL) { - schemata <- - tbl_lc(con, dbplyr::ident_q("information_schema.schemata"))# %>% - #select(catalog_name, schema_name) - - tables <- - tbl_lc(con, dbplyr::ident_q("information_schema.tables"))# %>% - #select(table_catalog, table_schema, table_name, table_type) - - columns <- - tbl_lc(con, dbplyr::ident_q("information_schema.columns"))# %>% - #select(table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) - - table_constraints <- - tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints"))# %>% - #select(constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) + con %>% + dm_meta_raw() %>% + select_dm_meta() %>% + filter_dm_meta(catalog, schema) +} - key_column_usage <- - tbl_lc(con, dbplyr::ident_q("information_schema.key_column_usage"))# %>% - # position_in_unique_constraint: removed, can't be part of a primary key - #select(constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position) +dm_meta_raw <- function(con) { + schemata <- tbl_lc(con, dbplyr::ident_q("information_schema.schemata"))# %>% + tables <- tbl_lc(con, dbplyr::ident_q("information_schema.tables"))# %>% + columns <- tbl_lc(con, dbplyr::ident_q("information_schema.columns"))# %>% + table_constraints <- tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints"))# %>% + key_column_usage <- tbl_lc(con, dbplyr::ident_q("information_schema.key_column_usage"))# %>% # not on mariadb: constraint_column_usage <- @@ -89,32 +81,8 @@ dm_meta <- function(con, catalog = NULL, schema = NULL) { group_by(constraint_catalog, constraint_schema, constraint_name) %>% mutate(ordinal_position = row_number()) %>% ungroup() - # %>% - #select(table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name) - - if (!is.null(catalog)) { - schemata <- schemata %>% filter(catalog_name %in% !!catalog) - tables <- tables %>% filter(table_catalog %in% !!catalog) - columns <- columns %>% filter(table_catalog %in% !!catalog) - table_constraints <- table_constraints %>% filter(table_catalog %in% !!catalog) - key_column_usage <- key_column_usage %>% filter(table_catalog %in% !!catalog) - constraint_column_usage <- constraint_column_usage %>% filter(table_catalog %in% !!catalog) - } - - if (!is.null(schema)) { - schemata <- schemata %>% filter(schema_name %in% !!schema) - tables <- tables %>% filter(table_schema %in% !!schema) - columns <- columns %>% filter(table_schema %in% !!schema) - table_constraints <- table_constraints %>% filter(table_schema %in% !!schema) - key_column_usage <- key_column_usage %>% filter(table_schema %in% !!schema) - constraint_column_usage <- constraint_column_usage %>% filter(table_schema %in% !!schema) - } - - info_raw <- dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) - info_raw - info <- - info_raw %>% + dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) %>% dm_add_pk(schemata, c(catalog_name, schema_name)) %>% dm_add_pk(tables, c(table_catalog, table_schema, table_name)) %>% dm_add_fk(tables, c(table_catalog, table_schema), schemata) %>% @@ -139,8 +107,6 @@ dm_meta <- function(con, catalog = NULL, schema = NULL) { dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position), key_column_usage) %>% dm_set_colors(brown = c(tables, columns), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) - - info } tbl_lc <- function(con, name) { @@ -153,6 +119,40 @@ tbl_lc <- function(con, name) { out %>% rename(!!!set_names(syms(names), names_lc)) } +select_dm_meta <- function(dm_meta) { + dm_meta %>% + dm_select(schemata, catalog_name, schema_name) %>% + dm_select(tables, table_catalog, table_schema, table_name, table_type) %>% + dm_select(columns, table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) %>% + dm_select(table_constraints, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) %>% + dm_select(key_column_usage, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position) %>% + dm_select(constraint_column_usage, table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name) +} + +filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { + if (!is.null(catalog)) { + FIXME + schemata <- schemata %>% filter(catalog_name %in% !!catalog) + tables <- tables %>% filter(table_catalog %in% !!catalog) + columns <- columns %>% filter(table_catalog %in% !!catalog) + table_constraints <- table_constraints %>% filter(table_catalog %in% !!catalog) + key_column_usage <- key_column_usage %>% filter(table_catalog %in% !!catalog) + constraint_column_usage <- constraint_column_usage %>% filter(table_catalog %in% !!catalog) + } + + if (!is.null(schema)) { + FIXME + schemata <- schemata %>% filter(schema_name %in% !!schema) + tables <- tables %>% filter(table_schema %in% !!schema) + columns <- columns %>% filter(table_schema %in% !!schema) + table_constraints <- table_constraints %>% filter(table_schema %in% !!schema) + key_column_usage <- key_column_usage %>% filter(table_schema %in% !!schema) + constraint_column_usage <- constraint_column_usage %>% filter(table_schema %in% !!schema) + } + + dm_meta +} + dm_learn_from_db_legacy <- function(dest, dbname, ...) { sql <- db_learn_query(con, dbname = dbname, ...) if (is.null(sql)) { From 44c9205020932f28640d4c6fcce66a18e709f826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 06:06:28 +0200 Subject: [PATCH 10/63] key_dm --- scratch/info.R | 166 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 145 insertions(+), 21 deletions(-) diff --git a/scratch/info.R b/scratch/info.R index 59812f8ad..c32356385 100644 --- a/scratch/info.R +++ b/scratch/info.R @@ -40,41 +40,165 @@ info %>% info %>% dm_get_tables() -pk <- +info_local <- info %>% + collect() + +quote_fq_schema <- function(con, catalog, schema) { + if (is_postgres(con) || is_mssql(con)) { + catalog <- dbQuoteIdentifier(con, catalog) + schema <- dbQuoteIdentifier(con, schema) + paste0(catalog, ".", schema) + } else { + bla + } +} + +quote_fq_table <- function(con, fq_schema, table) { + table <- dbQuoteIdentifier(con, table) + paste0(fq_schema, ".", table) +} + +# quote_fq_column <- function(con, fq_table, column) { +# table <- dbQuoteIdentifier(con, column) +# paste0(fq_table, ".", column) +# } + +fq_r_table_if_needed <- function(catalog, schema, table) { + fq <- tibble(catalog, schema, table) + + fq %>% + group_by(table) %>% + mutate(n = n()) %>% + ungroup() %>% + mutate(fq_table = if_else(n > 1, fq_r_table(catalog, schema, table), table)) %>% + pull() +} + +fq_r_table <- function(catalog, schema, table) { + if (length(unique(catalog)) > 1) { + catalog <- paste0(catalog, ".") + } else { + catalog <- "" + } + + if (length(unique(schema)) > 1) { + schema <- paste0(schema, ".") + } else { + schema <- "" + } + + paste0(catalog, schema, table) +} + +info_local_named <- + info_local %>% + + # FIXME: Simplify with rekey, https://github.com/cynkra/dm/issues/519 + dm_zoom_to(schemata) %>% + mutate(fq_schema_name = quote_fq_schema(!!con, catalog_name, schema_name), .before = catalog_name) %>% + dm_update_zoomed() %>% + + dm_zoom_to(tables) %>% + left_join(schemata, select = fq_schema_name) %>% + mutate(fq_table_name = quote_fq_table(!!con, fq_schema_name, table_name), .before = table_catalog) %>% + mutate(r_table_name = fq_r_table_if_needed(table_catalog, table_schema, table_name)) %>% + dm_update_zoomed() %>% + dm_zoom_to(table_constraints) %>% - filter(constraint_type == "PRIMARY KEY") %>% + left_join(tables, select = fq_table_name) %>% + mutate(fq_constraint_name = quote_fq_table(!!con, quote_fq_schema(!!con, constraint_catalog, constraint_schema), constraint_name), .before = constraint_catalog) %>% + select(fq_constraint_name, fq_table_name, everything()) %>% + dm_update_zoomed() %>% + + dm_zoom_to(columns) %>% + left_join(tables, select = fq_table_name) %>% + #mutate(fq_column_name = quote_fq_column(!!con, fq_table_name, column_name), .before = column_name) %>% + select(fq_table_name, everything()) %>% + #select(fq_table_name, fq_column_name, everything()) %>% dm_update_zoomed() %>% + dm_zoom_to(key_column_usage) %>% - semi_join(table_constraints) + left_join(columns, select = fq_table_name) %>% + left_join(table_constraints, select = fq_constraint_name) %>% + select(fq_constraint_name, fq_table_name, everything()) %>% + dm_update_zoomed() %>% -pk + dm_zoom_to(constraint_column_usage) %>% + left_join(columns, select = fq_table_name) %>% + left_join(table_constraints, select = fq_constraint_name) %>% + select(fq_constraint_name, fq_table_name, everything()) %>% + dm_update_zoomed() + +info_simple <- + dm(!!!dm_get_tables(info_local_named)) %>% + dm_add_pk(schemata, fq_schema_name) %>% + dm_add_pk(tables, fq_table_name) %>% + dm_add_fk(tables, fq_schema_name, schemata) %>% + dm_add_pk(columns, c(fq_table_name, column_name)) %>% + dm_add_fk(columns, fq_table_name, tables) %>% + #dm_add_fk(table_constraints, table_schema, schemata) %>% + dm_add_pk(table_constraints, fq_constraint_name) %>% + dm_add_fk(table_constraints, fq_table_name, tables) %>% + # constraint_schema vs. table_schema? + + # not on mssql: + #dm_add_fk(referential_constraints, c(constraint_schema, table_name), tables) %>% + #dm_add_fk(referential_constraints, c(constraint_schema, referenced_table_name), tables) %>% + + dm_add_pk(key_column_usage, c(fq_constraint_name, ordinal_position)) %>% + dm_add_fk(key_column_usage, c(fq_table_name, column_name), columns) %>% + dm_add_fk(key_column_usage, fq_constraint_name, table_constraints) %>% + + # not on mariadb; + dm_add_pk(constraint_column_usage, c(fq_constraint_name, ordinal_position)) %>% + dm_add_fk(constraint_column_usage, c(fq_table_name, column_name), columns) %>% + dm_add_fk(constraint_column_usage, fq_constraint_name, table_constraints) %>% + dm_add_fk(constraint_column_usage, c(fq_constraint_name, ordinal_position), key_column_usage) %>% + + dm_select(columns, -c(table_catalog, table_schema, table_name)) %>% + dm_select(table_constraints, -c(table_catalog, table_schema, table_name)) %>% + dm_select(key_column_usage, -c(table_catalog, table_schema, table_name)) %>% + dm_select(key_column_usage, -c(constraint_catalog, constraint_schema, constraint_name)) %>% + dm_select(constraint_column_usage, -c(table_catalog, table_schema, table_name)) %>% + dm_select(constraint_column_usage, -c(constraint_catalog, constraint_schema, constraint_name)) %>% + dm_set_colors(brown = c(tables, columns), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) + +info_simple %>% + dm_draw() + +key_dm <- + info_simple %>% + + dm_zoom_to(table_constraints) %>% + filter(constraint_type == "PRIMARY KEY") %>% + dm_insert_zoomed("pk_constraints") %>% + dm_zoom_to(key_column_usage) %>% + semi_join(pk_constraints) %>% + dm_insert_zoomed("pk") %>% -fk <- - info %>% dm_zoom_to(table_constraints) %>% filter(constraint_type == "FOREIGN KEY") %>% dm_update_zoomed() %>% dm_zoom_to(constraint_column_usage) %>% semi_join(table_constraints) %>% - rename(fk_table_catalog = table_catalog, fk_table_schema = table_schema, fk_table_name = table_name, fk_column_name = column_name) %>% + rename(fk_fq_table_name = fq_table_name, fk_column_name = column_name) %>% left_join(key_column_usage) %>% - rename(pk_table_catalog = table_catalog, pk_table_schema = table_schema, pk_table_name = table_name, pk_column_name = column_name) + rename(pk_fq_table_name = fq_table_name, pk_column_name = column_name) %>% -fk - -info_local <- - info %>% - collect() + # Postgres: Can return int64 here + mutate(ordinal_position = as.integer(ordinal_position)) %>% + dm_insert_zoomed("fk") %>% + dm_add_fk(fk, c(pk_fq_table_name, pk_column_name), columns) %>% -info_local %>% - dm_nrow() + dm_zoom_to(columns) %>% + left_join(tables, select = r_table_name) %>% + dm_update_zoomed() %>% -info_local$TABLE_CONSTRAINTS %>% - filter(TABLE_SCHEMA == "Financial_ijs") + dm_select_tbl(columns, pk, fk) -info_local$TABLE_CONSTRAINTS %>% - count(CONSTRAINT_TYPE) +key_dm %>% + dm_draw() -info_local$REFERENTIAL_CONSTRAINTS %>% - filter(CONSTRAINT_SCHEMA == "Financial_ijs") +key_dm %>% + dm_get_tables() From 2551eea7f45c31373daf4f01198f5ebd0929d7af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 06:10:45 +0200 Subject: [PATCH 11/63] More keys --- R/learn.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index 6d4ffc768..00036f796 100644 --- a/R/learn.R +++ b/R/learn.R @@ -102,6 +102,7 @@ dm_meta_raw <- function(con) { dm_add_fk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% # not on mariadb; + dm_add_pk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name, ordinal_position)) %>% dm_add_fk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position), key_column_usage) %>% @@ -126,7 +127,7 @@ select_dm_meta <- function(dm_meta) { dm_select(columns, table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) %>% dm_select(table_constraints, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) %>% dm_select(key_column_usage, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position) %>% - dm_select(constraint_column_usage, table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name) + dm_select(constraint_column_usage, table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name, ordinal_position) } filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { From 161e45fd054c11547182355af333882a940c15bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 06:11:01 +0200 Subject: [PATCH 12/63] Now try MSSQL --- scratch/info.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scratch/info.R b/scratch/info.R index c32356385..b611e56bb 100644 --- a/scratch/info.R +++ b/scratch/info.R @@ -3,8 +3,8 @@ pkgload::load_all() #src <- dm::dm_get_src(dm_financial()) #src <- test_src_maria() -src <- test_src_postgres() -#src <- test_src_mssql() +#src <- test_src_postgres() +src <- test_src_mssql() # DuckDB doesn't have references #src <- test_src_duckdb() From 7d19bf8046b2d308d5e86fc38761e1df1edf8680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 06:14:38 +0200 Subject: [PATCH 13/63] Special-case Postgres --- R/learn.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/R/learn.R b/R/learn.R index 00036f796..32649150d 100644 --- a/R/learn.R +++ b/R/learn.R @@ -75,12 +75,15 @@ dm_meta_raw <- function(con) { # not on mariadb: constraint_column_usage <- - tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% - - # Postgres: - group_by(constraint_catalog, constraint_schema, constraint_name) %>% - mutate(ordinal_position = row_number()) %>% - ungroup() + tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) + + if (is_postgres(con)) { + constraint_column_usage <- + constraint_column_usage %>% + group_by(constraint_catalog, constraint_schema, constraint_name) %>% + mutate(ordinal_position = row_number()) %>% + ungroup() + } dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) %>% dm_add_pk(schemata, c(catalog_name, schema_name)) %>% From f569478a797d8496f67b8d9a649994d88a6d3ea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 08:42:34 +0200 Subject: [PATCH 14/63] Works --- R/learn.R | 20 ++++++++++++++++++- scratch/info.R | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index 32649150d..5dd10e612 100644 --- a/R/learn.R +++ b/R/learn.R @@ -78,11 +78,29 @@ dm_meta_raw <- function(con) { tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) if (is_postgres(con)) { + fkc <- + table_constraints %>% + select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% + filter(constraint_type == "FOREIGN KEY") + constraint_column_usage <- constraint_column_usage %>% group_by(constraint_catalog, constraint_schema, constraint_name) %>% mutate(ordinal_position = row_number()) %>% - ungroup() + ungroup() %>% + semi_join(fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) + + # FIXME: Also has `position_in_unique_constraint`, used elsewhere? + } else if (is_mssql(con)) { + fkc <- + table_constraints %>% + select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% + filter(constraint_type == "FOREIGN KEY") + + constraint_column_usage <- + constraint_column_usage %>% + semi_join(fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) %>% + mutate(ordinal_position = NA_integer_) } dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) %>% diff --git a/scratch/info.R b/scratch/info.R index b611e56bb..6ca93fc5a 100644 --- a/scratch/info.R +++ b/scratch/info.R @@ -24,6 +24,59 @@ if (FALSE) { copy_dm_to(con, ., temporary = FALSE) } +src_pg <- test_src_postgres() +con_pg <- src_pg$con + +mssql_sys_db <- function(con, dbname, name) { + tbl(con, dbplyr::ident_q(paste0(dbname, ".", name))) %>% + mutate(catalog = !!dbname) %>% + select(catalog, everything()) +} + +mssql_sys_all_db <- function(con, name) { + # https://stackoverflow.com/a/9767471/946850 + databases <- + tbl(con, dbplyr::ident_q("sys.databases")) %>% + select(name) %>% + collect() %>% + pull() + + # FIXME: All databases + databases <- "test" + + lazy <- map(databases, ~tryCatch(mssql_sys_db(con, .x, name), error = function(e) NULL)) + reduce(compact(lazy), union_all) +} + +fkc <- mssql_sys_all_db(con, "sys.foreign_key_columns") +objects <- mssql_sys_all_db(con, "sys.objects") +tables <- mssql_sys_all_db(con, "sys.tables") +columns <- mssql_sys_all_db(con, "sys.columns") +schemas <- mssql_sys_all_db(con, "sys.schemas") + +sys_fkc_column_usage <- + fkc %>% + rename(ordinal_position = constraint_column_id) %>% + left_join(columns %>% select(catalog = catalog, column_name = name, object_id, column_id), by = c("catalog", "parent_object_id" = "object_id", "parent_column_id" = "column_id")) %>% + select(-parent_column_id) %>% + left_join(tables %>% select(catalog = catalog, schema_id, table_name = name, object_id), by = c("catalog", "parent_object_id" = "object_id")) %>% + select(-parent_object_id) %>% + left_join(schemas %>% select(catalog = catalog, schema_id, table_schema = name), by = c("catalog", "schema_id")) %>% + select(-schema_id) %>% + left_join(objects %>% select(constraint_name = name, object_id), by = c("constraint_object_id" = "object_id")) %>% + select(-constraint_object_id) %>% + + transmute(constraint_catalog = catalog, constraint_schema = table_schema, constraint_name, table_schema, table_name, column_name, ordinal_position) + +dm_meta(con)$constraint_column_usage %>% + select(-table_schema, -table_name, -ordinal_position) %>% + left_join(sys_fkc_column_usage) + +dm_meta(con)$key_column_usage +dm_meta(con_pg)$key_column_usage +dm_meta(con)$constraint_column_usage +dm_meta(con_pg)$constraint_column_usage + DBI::dbListTables(con, schema_name = "information_schema") obj <- DBI::dbListObjects(con) From a5f37cf133264145b871eee4dea5fb248d2ee0dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 09:31:04 +0200 Subject: [PATCH 15/63] Works now --- R/learn.R | 38 ++++++++++++++------------------------ scratch/info.R | 48 +++++------------------------------------------- 2 files changed, 19 insertions(+), 67 deletions(-) diff --git a/R/learn.R b/R/learn.R index 5dd10e612..ac9dd35cb 100644 --- a/R/learn.R +++ b/R/learn.R @@ -61,46 +61,36 @@ dm_learn_from_db_meta <- function(dest, catalog = NULL, schema = NULL) { dm_meta <- function(con, catalog = NULL, schema = NULL) { con %>% - dm_meta_raw() %>% + dm_meta_raw(catalog) %>% select_dm_meta() %>% filter_dm_meta(catalog, schema) } -dm_meta_raw <- function(con) { - schemata <- tbl_lc(con, dbplyr::ident_q("information_schema.schemata"))# %>% - tables <- tbl_lc(con, dbplyr::ident_q("information_schema.tables"))# %>% - columns <- tbl_lc(con, dbplyr::ident_q("information_schema.columns"))# %>% - table_constraints <- tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints"))# %>% - key_column_usage <- tbl_lc(con, dbplyr::ident_q("information_schema.key_column_usage"))# %>% +dm_meta_raw <- function(con, catalog) { + src <- src_from_src_or_con(con) - # not on mariadb: - constraint_column_usage <- - tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) + schemata <- tbl_lc(src, dbplyr::ident_q("information_schema.schemata")) + tables <- tbl_lc(src, dbplyr::ident_q("information_schema.tables")) + columns <- tbl_lc(src, dbplyr::ident_q("information_schema.columns")) + table_constraints <- tbl_lc(src, dbplyr::ident_q("information_schema.table_constraints")) + key_column_usage <- tbl_lc(src, dbplyr::ident_q("information_schema.key_column_usage")) - if (is_postgres(con)) { - fkc <- + if (is_postgres(src)) { + info_fkc <- table_constraints %>% select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% filter(constraint_type == "FOREIGN KEY") constraint_column_usage <- - constraint_column_usage %>% + tbl_lc(src, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% group_by(constraint_catalog, constraint_schema, constraint_name) %>% mutate(ordinal_position = row_number()) %>% ungroup() %>% - semi_join(fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) + semi_join(info_fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) # FIXME: Also has `position_in_unique_constraint`, used elsewhere? - } else if (is_mssql(con)) { - fkc <- - table_constraints %>% - select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% - filter(constraint_type == "FOREIGN KEY") - - constraint_column_usage <- - constraint_column_usage %>% - semi_join(fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) %>% - mutate(ordinal_position = NA_integer_) + } else if (is_mssql(src)) { + constraint_column_usage <- mssql_constraint_column_usage(src, table_constraints, catalog) } dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) %>% diff --git a/scratch/info.R b/scratch/info.R index 6ca93fc5a..3b70d8b25 100644 --- a/scratch/info.R +++ b/scratch/info.R @@ -27,56 +27,18 @@ if (FALSE) { src_pg <- test_src_postgres() con_pg <- src_pg$con -mssql_sys_db <- function(con, dbname, name) { - tbl(con, dbplyr::ident_q(paste0(dbname, ".", name))) %>% - mutate(catalog = !!dbname) %>% - select(catalog, everything()) -} - -mssql_sys_all_db <- function(con, name) { - # https://stackoverflow.com/a/9767471/946850 - databases <- - tbl(con, dbplyr::ident_q("sys.databases")) %>% - select(name) %>% - collect() %>% - pull() +dm_meta(con) +mssql_constraint_column_usage(con, tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints")), "test") - # FIXME: All databases - databases <- "test" - - lazy <- map(databases, ~tryCatch(mssql_sys_db(con, .x, name), error = function(e) NULL)) - reduce(compact(lazy), union_all) -} - -fkc <- mssql_sys_all_db(con, "sys.foreign_key_columns") -objects <- mssql_sys_all_db(con, "sys.objects") -tables <- mssql_sys_all_db(con, "sys.tables") -columns <- mssql_sys_all_db(con, "sys.columns") -schemas <- mssql_sys_all_db(con, "sys.schemas") - -sys_fkc_column_usage <- - fkc %>% - rename(ordinal_position = constraint_column_id) %>% - left_join(columns %>% select(catalog = catalog, column_name = name, object_id, column_id), by = c("catalog", "parent_object_id" = "object_id", "parent_column_id" = "column_id")) %>% - select(-parent_column_id) %>% - left_join(tables %>% select(catalog = catalog, schema_id, table_name = name, object_id), by = c("catalog", "parent_object_id" = "object_id")) %>% - select(-parent_object_id) %>% - left_join(schemas %>% select(catalog = catalog, schema_id, table_schema = name), by = c("catalog", "schema_id")) %>% - select(-schema_id) %>% - left_join(objects %>% select(constraint_name = name, object_id), by = c("constraint_object_id" = "object_id")) %>% - select(-constraint_object_id) %>% - - transmute(constraint_catalog = catalog, constraint_schema = table_schema, constraint_name, table_schema, table_name, column_name, ordinal_position) - -dm_meta(con)$constraint_column_usage %>% - select(-table_schema, -table_name, -ordinal_position) %>% - left_join(sys_fkc_column_usage) +asdf dm_meta(con)$key_column_usage dm_meta(con_pg)$key_column_usage dm_meta(con)$constraint_column_usage dm_meta(con_pg)$constraint_column_usage +asdf + DBI::dbListTables(con, schema_name = "information_schema") obj <- DBI::dbListObjects(con) From 3820f7a865a09938eb6d5f47ca18393ef15ee1c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 09:37:26 +0200 Subject: [PATCH 16/63] NA catalog --- R/learn.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/learn.R b/R/learn.R index ac9dd35cb..17eb91222 100644 --- a/R/learn.R +++ b/R/learn.R @@ -59,7 +59,7 @@ dm_learn_from_db_meta <- function(dest, catalog = NULL, schema = NULL) { info } -dm_meta <- function(con, catalog = NULL, schema = NULL) { +dm_meta <- function(con, catalog = NA, schema = NULL) { con %>% dm_meta_raw(catalog) %>% select_dm_meta() %>% @@ -142,7 +142,7 @@ select_dm_meta <- function(dm_meta) { } filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { - if (!is.null(catalog)) { + if (!is.null(catalog) && !is.na(catalog)) { FIXME schemata <- schemata %>% filter(catalog_name %in% !!catalog) tables <- tables %>% filter(table_catalog %in% !!catalog) From f8e889b169e32944dc76fb2037139c8f258768f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 09:37:38 +0200 Subject: [PATCH 17/63] Forgot --- R/mssql.R | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ R/zzz.R | 2 +- 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 R/mssql.R diff --git a/R/mssql.R b/R/mssql.R new file mode 100644 index 000000000..b4fd70509 --- /dev/null +++ b/R/mssql.R @@ -0,0 +1,77 @@ +mssql_sys_db <- function(con, dbname, name) { + if (is.na(dbname)) { + fq_name <- name + sql_name <- sql("DB_NAME()") + } else { + fq_name <- paste0(dbname, ".", name) + sql_name <- dbname + } + tbl(con, dbplyr::ident_q(fq_name)) %>% + mutate(catalog = !!sql_name) %>% + select(catalog, everything()) +} + +mssql_sys_all_db <- function(con, dbname, name, warn = FALSE) { + lazy <- map(dbname, ~tryCatch( + mssql_sys_db(con, .x, name), + error = function(e) { + if (warn) { + warn(paste0("Can't access database ", .x, ": ", conditionMessage(e))) + } + NULL + } + )) + reduce(compact(lazy), union_all) +} + +mssql_sys_databases <- function(dbname) { + if (is.null(dbname)) { + dbname <- + tbl(con, dbplyr::ident_q("sys.databases")) %>% + select(name) %>% + collect() %>% + pull() + } else if (is.na(dbname)) { + dbname <- NA_character_ + } else { + stopifnot(is.character(dbname)) + } + dbname +} + +mssql_constraint_column_usage <- function(con, table_constraints, dbname) { + dbname <- mssql_sys_databases(dbname) + + info_fkc <- + table_constraints %>% + select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% + filter(constraint_type == "FOREIGN KEY") + + fkc <- + mssql_sys_all_db(con, dbname, "sys.foreign_key_columns", warn = TRUE) + columns <- + mssql_sys_all_db(con, dbname, "sys.columns") %>% + select(catalog = catalog, column_name = name, object_id, column_id) + tables <- + mssql_sys_all_db(con, dbname, "sys.tables") %>% + select(catalog = catalog, schema_id, table_name = name, object_id) + schemas <- + mssql_sys_all_db(con, dbname, "sys.schemas") %>% + select(catalog = catalog, schema_id, table_schema = name) + objects <- + mssql_sys_all_db(con, dbname, "sys.objects") %>% + select(constraint_name = name, object_id) + + sys_fkc_column_usage <- + fkc %>% + left_join(columns, by = c("catalog", "parent_object_id" = "object_id", "parent_column_id" = "column_id")) %>% + left_join(tables, by = c("catalog", "referenced_object_id" = "object_id")) %>% + left_join(schemas, by = c("catalog", "schema_id")) %>% + left_join(objects, by = c("constraint_object_id" = "object_id")) %>% + transmute(constraint_catalog = catalog, constraint_schema = table_schema, constraint_name, table_schema, table_name, column_name, ordinal_position = constraint_column_id) + + tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% + semi_join(info_fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) %>% + select(-table_schema, -table_name) %>% + left_join(sys_fkc_column_usage, by = c("constraint_catalog", "constraint_schema", "constraint_name", "column_name")) +} diff --git a/R/zzz.R b/R/zzz.R index d8d84f1a4..c7c198cd0 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -14,7 +14,7 @@ register_pkgdown_methods() #rigg(enum_pk_candidates_impl) - #rigg(build_copy_data) + #rigg(mssql_sys_all_db) } rigg <- function(fun) { From 1862cd5a76acae79df6f8ac5e7c475bbbf821bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 09:48:22 +0200 Subject: [PATCH 18/63] Global --- R/global.R | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/R/global.R b/R/global.R index cf8d1bf8b..bc3d6cfe5 100644 --- a/R/global.R +++ b/R/global.R @@ -92,5 +92,32 @@ utils::globalVariables(c( "districts", "loans", "orders", - "trans" + "trans", + # + # information_schema + "catalog", + "catalog_name", + "column_default", + "column_id", + "column_name", + "con", + "constraint_catalog", + "constraint_column_id", + "constraint_column_usage", + "constraint_name", + "constraint_schema", + "constraint_type", + "dbname", + "FIXME", + "is_nullable", + "key_column_usage", + "object_id", + "ordinal_position", + "schema_id", + "schemata", + "table_catalog", + "table_constraints", + "table_schema", + "table_type", + "tables" )) From 52ce1e924b1d6bd3d723b370f5fe1848d1c7cd52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 09:48:27 +0200 Subject: [PATCH 19/63] WAT --- scratch/info.R | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/scratch/info.R b/scratch/info.R index 3b70d8b25..02da3dcae 100644 --- a/scratch/info.R +++ b/scratch/info.R @@ -27,18 +27,11 @@ if (FALSE) { src_pg <- test_src_postgres() con_pg <- src_pg$con -dm_meta(con) -mssql_constraint_column_usage(con, tbl_lc(con, dbplyr::ident_q("information_schema.table_constraints")), "test") - -asdf - dm_meta(con)$key_column_usage dm_meta(con_pg)$key_column_usage dm_meta(con)$constraint_column_usage dm_meta(con_pg)$constraint_column_usage -asdf - DBI::dbListTables(con, schema_name = "information_schema") obj <- DBI::dbListObjects(con) @@ -182,6 +175,9 @@ info_simple <- info_simple %>% dm_draw() +info_simple %>% + dm_get_tables() + key_dm <- info_simple %>% @@ -217,3 +213,8 @@ key_dm %>% key_dm %>% dm_get_tables() + +key_dm$fk %>% + select(-fq_constraint_name) + +# FIXME: Why is this missing? From 92403bbfca16ef266a05ee93de48728c37ef9e3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 11:04:18 +0200 Subject: [PATCH 20/63] Bugfix --- R/mssql.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/mssql.R b/R/mssql.R index b4fd70509..0521dc205 100644 --- a/R/mssql.R +++ b/R/mssql.R @@ -64,14 +64,16 @@ mssql_constraint_column_usage <- function(con, table_constraints, dbname) { sys_fkc_column_usage <- fkc %>% - left_join(columns, by = c("catalog", "parent_object_id" = "object_id", "parent_column_id" = "column_id")) %>% + left_join(columns, by = c("catalog", "referenced_object_id" = "object_id", "referenced_column_id" = "column_id")) %>% left_join(tables, by = c("catalog", "referenced_object_id" = "object_id")) %>% left_join(schemas, by = c("catalog", "schema_id")) %>% left_join(objects, by = c("constraint_object_id" = "object_id")) %>% + # table_schema is used twice transmute(constraint_catalog = catalog, constraint_schema = table_schema, constraint_name, table_schema, table_name, column_name, ordinal_position = constraint_column_id) tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% semi_join(info_fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) %>% - select(-table_schema, -table_name) %>% - left_join(sys_fkc_column_usage, by = c("constraint_catalog", "constraint_schema", "constraint_name", "column_name")) + select(-table_schema, -table_name, -column_name) %>% + distinct() %>% + left_join(sys_fkc_column_usage, by = c("constraint_catalog", "constraint_schema", "constraint_name")) } From 0a8b96f25076155af653446e3069cc72e8662dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 11:37:47 +0200 Subject: [PATCH 21/63] Fix PK --- R/learn.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index 17eb91222..4804ba276 100644 --- a/R/learn.R +++ b/R/learn.R @@ -113,7 +113,7 @@ dm_meta_raw <- function(con, catalog) { dm_add_fk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% # not on mariadb; - dm_add_pk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name, ordinal_position)) %>% + dm_add_pk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% dm_add_fk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position), key_column_usage) %>% From 271f1a3c02881344f5d602b1bab5c5b5b3ebbf4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 11:37:54 +0200 Subject: [PATCH 22/63] Examine --- scratch/info.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scratch/info.R b/scratch/info.R index 02da3dcae..ea0fffd45 100644 --- a/scratch/info.R +++ b/scratch/info.R @@ -48,6 +48,9 @@ info %>% info %>% dm_get_tables() +info %>% + dm_examine_constraints() + info_local <- info %>% collect() From fa5176781abd1de70258fd22e8e6c4795f762000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 30 Apr 2021 11:57:07 +0200 Subject: [PATCH 23/63] Bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2f7395afa..86705c252 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: dm Title: Relational Data Models -Version: 0.1.99.9002 +Version: 0.1.99.9003 Date: 2021-04-28 Authors@R: c(person(given = "Tobias", From c1d68a25ae7ff3d286e24df39f9ab88285a40aa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 2 May 2021 03:34:49 +0200 Subject: [PATCH 24/63] Example script --- scratch/kcu.sql | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 scratch/kcu.sql diff --git a/scratch/kcu.sql b/scratch/kcu.sql new file mode 100644 index 000000000..04242f65e --- /dev/null +++ b/scratch/kcu.sql @@ -0,0 +1,7 @@ +SELECT KCU1.CONSTRAINT_SCHEMA AS FK_CONSTRAINT_SCHEMA , + KCU1.CONSTRAINT_NAME AS FK_CONSTRAINT_NAME , KCU1.TABLE_SCHEMA AS FK_TABLE_SCHEMA , KCU1.TABLE_NAME AS FK_TABLE_NAME , KCU1.COLUMN_NAME AS FK_COLUMN_NAME , KCU1.ORDINAL_POSITION AS FK_ORDINAL_POSITION , KCU2.CONSTRAINT_SCHEMA AS REFERENCED_CONSTRAINT_SCHEMA , KCU2.CONSTRAINT_NAME AS REFERENCED_CONSTRAINT_NAME , KCU2.TABLE_SCHEMA AS REFERENCED_TABLE_SCHEMA , + KCU2.TABLE_NAME AS REFERENCED_TABLE_NAME , KCU2.COLUMN_NAME AS REFERENCED_COLUMN_NAME , KCU2.ORDINAL_POSITION AS REFERENCED_ORDINAL_POSITION +FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS AS RC + INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KCU1 ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG AND + KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME + INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KCU2 ON KCU2.CONSTRAINT_CATALOG = RC.UNIQUE_CONSTRAINT_CATALOG AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION From fe8cce8a8fc58605f288f25c3307beb6ea7c32ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 5 Jul 2021 05:30:26 +0200 Subject: [PATCH 25/63] Get con --- R/learn.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/learn.R b/R/learn.R index 31f8c382a..a6831589a 100644 --- a/R/learn.R +++ b/R/learn.R @@ -160,6 +160,7 @@ filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { } dm_learn_from_db_legacy <- function(dest, dbname, ...) { + con <- dest$con sql <- db_learn_query(con, dbname = dbname, ...) if (is.null(sql)) { return() From e4dfaff875894efc88f251b296c4b5ffae70c068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 5 Jul 2021 07:58:23 +0200 Subject: [PATCH 26/63] FIXME and formatting --- R/dm-from-src.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/dm-from-src.R b/R/dm-from-src.R index 54fe611e9..1b942662e 100644 --- a/R/dm-from-src.R +++ b/R/dm-from-src.R @@ -56,6 +56,8 @@ dm_from_src <- function(src = NULL, table_names = NULL, learn_keys = NULL, src <- src_from_src_or_con(src) con <- con_from_src_or_con(src) + # FIXME: Get rid of legacy method once it works for all + if (is.null(learn_keys) || isTRUE(learn_keys)) { dm_learned <- dm_learn_from_db(src, ...) @@ -117,7 +119,9 @@ dm_from_src <- function(src = NULL, table_names = NULL, learn_keys = NULL, } quote_ids <- function(x, con, schema = NULL) { - if (is.null(con)) return(x) + if (is.null(con)) { + return(x) + } if (is_null(schema)) { map( From e4a0c3cc35c4c681317fc3a49ef3d837e7312133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 5 Jul 2021 07:59:57 +0200 Subject: [PATCH 27/63] Implement filter_dm_meta() --- R/learn.R | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/R/learn.R b/R/learn.R index a6831589a..1df0a3281 100644 --- a/R/learn.R +++ b/R/learn.R @@ -93,25 +93,25 @@ dm_meta_raw <- function(con, catalog) { dm_add_fk(tables, c(table_catalog, table_schema), schemata) %>% dm_add_pk(columns, c(table_catalog, table_schema, table_name, column_name)) %>% dm_add_fk(columns, c(table_catalog, table_schema, table_name), tables) %>% - #dm_add_fk(table_constraints, table_schema, schemata) %>% + # dm_add_fk(table_constraints, table_schema, schemata) %>% dm_add_pk(table_constraints, c(constraint_catalog, constraint_schema, constraint_name)) %>% dm_add_fk(table_constraints, c(table_catalog, table_schema, table_name), tables) %>% # constraint_schema vs. table_schema? # not on mssql: - #dm_add_fk(referential_constraints, c(constraint_schema, table_name), tables) %>% - #dm_add_fk(referential_constraints, c(constraint_schema, referenced_table_name), tables) %>% + # dm_add_fk(referential_constraints, c(constraint_schema, table_name), tables) %>% + # dm_add_fk(referential_constraints, c(constraint_schema, referenced_table_name), tables) %>% dm_add_pk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% dm_add_fk(key_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% dm_add_fk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% - + # # not on mariadb; dm_add_pk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% dm_add_fk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position), key_column_usage) %>% - + # dm_set_colors(brown = c(tables, columns), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) } @@ -136,8 +136,17 @@ select_dm_meta <- function(dm_meta) { } filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { + force(catalog) + force(schema) + + schemata <- dm_meta$schemata + tables <- dm_meta$tables + columns <- dm_meta$columns + table_constraints <- dm_meta$table_constraints + key_column_usage <- dm_meta$key_column_usage + constraint_column_usage <- dm_meta$constraint_column_usage + if (!is.null(catalog) && !is.na(catalog)) { - FIXME schemata <- schemata %>% filter(catalog_name %in% !!catalog) tables <- tables %>% filter(table_catalog %in% !!catalog) columns <- columns %>% filter(table_catalog %in% !!catalog) @@ -147,7 +156,6 @@ filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { } if (!is.null(schema)) { - FIXME schemata <- schemata %>% filter(schema_name %in% !!schema) tables <- tables %>% filter(table_schema %in% !!schema) columns <- columns %>% filter(table_schema %in% !!schema) @@ -156,7 +164,14 @@ filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { constraint_column_usage <- constraint_column_usage %>% filter(table_schema %in% !!schema) } - dm_meta + dm( + schemata, + tables, + columns, + table_constraints, + key_column_usage, + constraint_column_usage + ) } dm_learn_from_db_legacy <- function(dest, dbname, ...) { From 77c8c857dd718e306b8c598cd75334aa1747f7fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Tue, 12 Oct 2021 20:55:46 +0200 Subject: [PATCH 28/63] Tweaks --- R/learn.R | 32 ++++++++++++++++++++++++++------ R/mssql.R | 4 ++-- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/R/learn.R b/R/learn.R index 1df0a3281..140315af5 100644 --- a/R/learn.R +++ b/R/learn.R @@ -42,15 +42,29 @@ dm_learn_from_db <- function(dest, dbname = NULL, ...) { } #if (!is_mssql(con)) { - return(dm_learn_from_db_legacy(dest, dbname, ...)) + return(dm_learn_from_db_legacy(con, dbname, ...)) #} - dm_learn_from_db_meta(dest, catalog = dbname, ...) + dm_learn_from_db_meta(con, catalog = dbname, ...) } -dm_learn_from_db_meta <- function(dest, catalog = NULL, schema = NULL) { - info <- dm_meta(con, catalog = dbname, schema = schema) - info +dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL) { + info <- dm_meta(con, catalog = catalog, schema = schema) + + table_info <- + info$columns %>% + select(catalog = table_catalog, schema = table_schema, table = table_name, column_name, ordinal_position) %>% + arrange(catalog, schema, table, ordinal_position) %>% + collect() %>% + arrange(catalog, schema, table) %>% + group_by(catalog, schema, table) %>% + summarize(vars = list(column_name)) %>% + ungroup() + + from <- pmap_chr(table_info[1:3], ~ DBI::dbQuoteIdentifier(con, DBI::Id(...))) + + tables <- map2(from, table_info$vars, ~ tbl(con, dbplyr::ident_q(.x), vars = .y)) + tables } dm_meta <- function(con, catalog = NA, schema = NULL) { @@ -88,6 +102,11 @@ dm_meta_raw <- function(con, catalog) { } dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) %>% + dm_meta_add_keys() +} + +dm_meta_add_keys <- function(dm_meta) { + dm_meta %>% dm_add_pk(schemata, c(catalog_name, schema_name)) %>% dm_add_pk(tables, c(table_catalog, table_schema, table_name)) %>% dm_add_fk(tables, c(table_catalog, table_schema), schemata) %>% @@ -171,7 +190,8 @@ filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { table_constraints, key_column_usage, constraint_column_usage - ) + ) %>% + dm_meta_add_keys() } dm_learn_from_db_legacy <- function(dest, dbname, ...) { diff --git a/R/mssql.R b/R/mssql.R index 0521dc205..7f3281fd4 100644 --- a/R/mssql.R +++ b/R/mssql.R @@ -24,7 +24,7 @@ mssql_sys_all_db <- function(con, dbname, name, warn = FALSE) { reduce(compact(lazy), union_all) } -mssql_sys_databases <- function(dbname) { +mssql_sys_databases <- function(con, dbname) { if (is.null(dbname)) { dbname <- tbl(con, dbplyr::ident_q("sys.databases")) %>% @@ -40,7 +40,7 @@ mssql_sys_databases <- function(dbname) { } mssql_constraint_column_usage <- function(con, table_constraints, dbname) { - dbname <- mssql_sys_databases(dbname) + dbname <- mssql_sys_databases(con, dbname) info_fkc <- table_constraints %>% From 27ae5814c9501f4624ea6b74549b43f935e8269b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Wed, 13 Oct 2021 16:32:40 +0200 Subject: [PATCH 29/63] name_format, get tables --- R/learn.R | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/R/learn.R b/R/learn.R index 140315af5..5ca92d325 100644 --- a/R/learn.R +++ b/R/learn.R @@ -48,22 +48,43 @@ dm_learn_from_db <- function(dest, dbname = NULL, ...) { dm_learn_from_db_meta(con, catalog = dbname, ...) } -dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL) { +dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_format = "{table}") { info <- dm_meta(con, catalog = catalog, schema = schema) + df_info <- + info %>% + dm_select_tbl(-schemata) %>% + collect() + + dm_name <- + df_info$tables %>% + select(catalog = table_catalog, schema = table_schema, table = table_name) %>% + mutate(name = glue(name_format)) %>% + pull() %>% + vec_as_names(repair = "unique") + + from <- + df_info$tables %>% + select(catalog = table_catalog, schema = table_schema, table = table_name) %>% + pmap_chr(~ DBI::dbQuoteIdentifier(con, DBI::Id(...))) + + df_info <- + df_info %>% + dm_zoom_to(tables) %>% + mutate(dm_name = !!dm_name, from = !!from) %>% + dm_update_zoomed() + table_info <- - info$columns %>% - select(catalog = table_catalog, schema = table_schema, table = table_name, column_name, ordinal_position) %>% - arrange(catalog, schema, table, ordinal_position) %>% - collect() %>% - arrange(catalog, schema, table) %>% - group_by(catalog, schema, table) %>% + df_info %>% + dm_zoom_to(columns) %>% + left_join(tables) %>% + group_by(dm_name, from) %>% summarize(vars = list(column_name)) %>% - ungroup() - - from <- pmap_chr(table_info[1:3], ~ DBI::dbQuoteIdentifier(con, DBI::Id(...))) + ungroup() %>% + pull_tbl() - tables <- map2(from, table_info$vars, ~ tbl(con, dbplyr::ident_q(.x), vars = .y)) + tables <- map2(table_info$from, table_info$vars, ~ tbl(con, dbplyr::ident_q(.x), vars = .y)) + names(tables) <- table_info$dm_name tables } From fb715e3524d0b85d9218360c13690201d13abdad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Wed, 13 Oct 2021 17:02:24 +0200 Subject: [PATCH 30/63] pks --- R/learn.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/R/learn.R b/R/learn.R index 5ca92d325..9cf970494 100644 --- a/R/learn.R +++ b/R/learn.R @@ -86,6 +86,19 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma tables <- map2(table_info$from, table_info$vars, ~ tbl(con, dbplyr::ident_q(.x), vars = .y)) names(tables) <- table_info$dm_name tables + + pks <- + df_info %>% + dm_select(columns, -ordinal_position) %>% + dm_select_tbl(constraint_column_usage, key_column_usage, columns, tables) %>% + dm_zoom_to(key_column_usage) %>% + anti_join(constraint_column_usage) %>% + dm_update_zoomed() %>% + dm_squash_to_tbl(key_column_usage) %>% + select(constraint_catalog, constraint_schema, constraint_name, dm_name, column = column_name) %>% + nest(data = column) %>% + select(dm_name, data) %>% + deframe() } dm_meta <- function(con, catalog = NA, schema = NULL) { From 186f367437258edb0f63bb575b45f3cdd520b895 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Wed, 13 Oct 2021 19:00:58 +0200 Subject: [PATCH 31/63] Oops --- R/learn.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/learn.R b/R/learn.R index 57ac44bd5..e3c1a92d7 100644 --- a/R/learn.R +++ b/R/learn.R @@ -228,8 +228,7 @@ filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { dm_meta_add_keys() } -dm_learn_from_db_legacy <- function(dest, dbname, ...) { - con <- dest$con +dm_learn_from_db_legacy <- function(con, dbname, ...) { sql <- db_learn_query(con, dbname = dbname, ...) if (is.null(sql)) { return() From 4d5e8bc2398ad03973eef1eb2a94cd3f1dc69414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Wed, 13 Oct 2021 20:18:38 +0200 Subject: [PATCH 32/63] MSSQL learns from dm_meta() --- R/learn.R | 73 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 12 deletions(-) diff --git a/R/learn.R b/R/learn.R index e3c1a92d7..23d8e1101 100644 --- a/R/learn.R +++ b/R/learn.R @@ -41,9 +41,9 @@ dm_learn_from_db <- function(dest, dbname = NULL, ...) { return() } - #if (!is_mssql(con)) { + if (!is_mssql(con)) { return(dm_learn_from_db_legacy(con, dbname, ...)) - #} + } dm_learn_from_db_meta(con, catalog = dbname, ...) } @@ -68,16 +68,21 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma select(catalog = table_catalog, schema = table_schema, table = table_name) %>% pmap_chr(~ DBI::dbQuoteIdentifier(con, DBI::Id(...))) - df_info <- + df_key_info <- df_info %>% dm_zoom_to(tables) %>% mutate(dm_name = !!dm_name, from = !!from) %>% - dm_update_zoomed() + dm_update_zoomed() %>% + dm_zoom_to(columns) %>% + arrange(ordinal_position) %>% + select(-ordinal_position) %>% + left_join(tables) %>% + dm_update_zoomed() %>% + dm_select_tbl(constraint_column_usage, key_column_usage, columns) table_info <- - df_info %>% + df_key_info %>% dm_zoom_to(columns) %>% - left_join(tables) %>% group_by(dm_name, from) %>% summarize(vars = list(column_name)) %>% ungroup() %>% @@ -85,20 +90,64 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma tables <- map2(table_info$from, table_info$vars, ~ tbl(con, dbplyr::ident_q(.x), vars = .y)) names(tables) <- table_info$dm_name - tables pks <- - df_info %>% - dm_select(columns, -ordinal_position) %>% - dm_select_tbl(constraint_column_usage, key_column_usage, columns, tables) %>% + df_key_info %>% dm_zoom_to(key_column_usage) %>% anti_join(constraint_column_usage) %>% + arrange(ordinal_position) %>% dm_update_zoomed() %>% dm_squash_to_tbl(key_column_usage) %>% - select(constraint_catalog, constraint_schema, constraint_name, dm_name, column = column_name) %>% - nest(data = column) %>% + select(constraint_catalog, constraint_schema, constraint_name, dm_name, column_name) %>% + group_by(constraint_catalog, constraint_schema, constraint_name, dm_name) %>% + summarize(data = list(tibble(column = list(column_name)))) %>% + ungroup() %>% select(dm_name, data) %>% deframe() + + fks <- + df_key_info %>% + dm_zoom_to(key_column_usage) %>% + left_join(columns, select = c(column_name, dm_name, table_catalog, table_schema, table_name)) %>% + dm_update_zoomed() %>% + dm_zoom_to(constraint_column_usage) %>% + left_join(columns, select = c(column_name, dm_name, table_catalog, table_schema, table_name)) %>% + dm_update_zoomed() %>% + dm_select_tbl(-columns) %>% + dm_disambiguate_cols(quiet = TRUE) %>% + dm_flatten_to_tbl(constraint_column_usage) %>% + select( + constraint_catalog, + constraint_schema, + constraint_name, + ordinal_position, + ref_table = constraint_column_usage.dm_name, + ref_column = constraint_column_usage.column_name, + table = key_column_usage.dm_name, + column = key_column_usage.column_name, + ) %>% + arrange( + constraint_catalog, + constraint_schema, + constraint_name, + ordinal_position, + ) %>% + select(-ordinal_position) %>% + # FIXME: Where to learn this in INFORMATION_SCHEMA? + group_by( + constraint_catalog, + constraint_schema, + constraint_name, + ref_table, + ) %>% + summarize(data = list(tibble( + ref_column = list(ref_column), table = table[[1]], column = list(column), on_delete = "no_action" + ))) %>% + ungroup() %>% + select(-(1:3)) %>% + deframe() + + new_dm2(tables, pks, fks) } dm_meta <- function(con, catalog = NA, schema = NULL) { From 42e8e1a028062c131cd25cf7fc56ca7e6679bb9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Wed, 13 Oct 2021 21:25:02 +0200 Subject: [PATCH 33/63] Globals --- R/global.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/global.R b/R/global.R index 9869daa00..eba20e5f5 100644 --- a/R/global.R +++ b/R/global.R @@ -125,5 +125,10 @@ utils::globalVariables(c( "table_constraints", "table_schema", "table_type", - "tables" + "tables", + "constraint_column_usage.column_name", + "constraint_column_usage.dm_name", + "key_column_usage.column_name", + "key_column_usage.dm_name", + NULL )) From a58d96c97cf7018424d1ef9365e425c9f1ee640c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 3 Jan 2022 07:05:39 +0100 Subject: [PATCH 34/63] Work around trailing comma problem --- R/global.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/global.R b/R/global.R index 16123104a..223b04e1d 100644 --- a/R/global.R +++ b/R/global.R @@ -119,5 +119,8 @@ utils::globalVariables(c( "index_name", "remote_table", "remote_table_unquoted", - "unique_def" + "unique_def", + # + # keep this to avoid dealing with trailing commas + NULL )) From 9411f8ffded20d86870e2dcc647e211d60f83822 Mon Sep 17 00:00:00 2001 From: krlmlr Date: Wed, 9 Mar 2022 15:49:38 +0000 Subject: [PATCH 35/63] Auto-update from GitHub Actions Run: https://github.com/cynkra/dm/actions/runs/1958221284 --- R/mssql.R | 4 ++-- R/zzz.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/mssql.R b/R/mssql.R index 7f3281fd4..12f546878 100644 --- a/R/mssql.R +++ b/R/mssql.R @@ -12,7 +12,7 @@ mssql_sys_db <- function(con, dbname, name) { } mssql_sys_all_db <- function(con, dbname, name, warn = FALSE) { - lazy <- map(dbname, ~tryCatch( + lazy <- map(dbname, ~ tryCatch( mssql_sys_db(con, .x, name), error = function(e) { if (warn) { @@ -64,7 +64,7 @@ mssql_constraint_column_usage <- function(con, table_constraints, dbname) { sys_fkc_column_usage <- fkc %>% - left_join(columns, by = c("catalog", "referenced_object_id" = "object_id", "referenced_column_id" = "column_id")) %>% + left_join(columns, by = c("catalog", "referenced_object_id" = "object_id", "referenced_column_id" = "column_id")) %>% left_join(tables, by = c("catalog", "referenced_object_id" = "object_id")) %>% left_join(schemas, by = c("catalog", "schema_id")) %>% left_join(objects, by = c("constraint_object_id" = "object_id")) %>% diff --git a/R/zzz.R b/R/zzz.R index e478a16a4..36bb6c521 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -17,7 +17,7 @@ check_version_on_load("RSQLite", "2.2.8", "to use the {.code returning} argument in {.code dm::rows_*()}") # rigg(enum_pk_candidates_impl) - #rigg(mssql_sys_all_db) + # rigg(mssql_sys_all_db) # rigg(build_copy_data) # rigg(dm_insert_zoomed_outgoing_fks) # rigg(dm_upgrade) From 739f857b9d00d03ad6e9a70eb56bb4082ad7ccde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 18:01:47 +0100 Subject: [PATCH 36/63] Manual rename --- R/learn.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index 23d8e1101..14020719e 100644 --- a/R/learn.R +++ b/R/learn.R @@ -114,7 +114,16 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma left_join(columns, select = c(column_name, dm_name, table_catalog, table_schema, table_name)) %>% dm_update_zoomed() %>% dm_select_tbl(-columns) %>% - dm_disambiguate_cols(quiet = TRUE) %>% + dm_rename(constraint_column_usage, constraint_column_usage.table_catalog = table_catalog) %>% + dm_rename(constraint_column_usage, constraint_column_usage.table_schema = table_schema) %>% + dm_rename(constraint_column_usage, constraint_column_usage.table_name = table_name) %>% + dm_rename(constraint_column_usage, constraint_column_usage.column_name = column_name) %>% + dm_rename(constraint_column_usage, constraint_column_usage.dm_name = dm_name) %>% + dm_rename(key_column_usage, key_column_usage.table_catalog = table_catalog) %>% + dm_rename(key_column_usage, key_column_usage.table_schema = table_schema) %>% + dm_rename(key_column_usage, key_column_usage.table_name = table_name) %>% + dm_rename(key_column_usage, key_column_usage.column_name = column_name) %>% + dm_rename(key_column_usage, key_column_usage.dm_name = dm_name) %>% dm_flatten_to_tbl(constraint_column_usage) %>% select( constraint_catalog, From f7f3b5183f23b34d08a4658563655282ec10c286 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 18:01:53 +0100 Subject: [PATCH 37/63] Fix corner case --- R/learn.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index 14020719e..62f9d76f5 100644 --- a/R/learn.R +++ b/R/learn.R @@ -150,7 +150,10 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma ref_table, ) %>% summarize(data = list(tibble( - ref_column = list(ref_column), table = table[[1]], column = list(column), on_delete = "no_action" + ref_column = list(ref_column), + table = if (length(table) > 0) table[[1]] else NA_character_, + column = list(column), + on_delete = "no_action" ))) %>% ungroup() %>% select(-(1:3)) %>% From 3830f870a699c3171a67c785a44eedf9c318428d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 18:02:11 +0100 Subject: [PATCH 38/63] Temporarily switch to database to be learned --- R/learn.R | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index 62f9d76f5..ce43d00e4 100644 --- a/R/learn.R +++ b/R/learn.R @@ -163,10 +163,29 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma } dm_meta <- function(con, catalog = NA, schema = NULL) { + if (is_mssql(con)) { + if (is.null(catalog)) { + # FIXME: Classed error message? + abort("SQL server only supports learning from one database.") + } + + if (!is.na(catalog)) { + message("Temporarily switching to database ", tick(catalog), ".") + old_dbname <- dbGetQuery(con, "SELECT DB_NAME()")[[1]] + sql <- paste0("USE ", dbQuoteIdentifier(con, catalog)) + old_sql <- paste0("USE ", dbQuoteIdentifier(con, old_dbname)) + dbExecute(con, sql, immediate = TRUE) + withr::defer({ + dbExecute(con, old_sql, immediate = TRUE) + }) + } + } + con %>% dm_meta_raw(catalog) %>% select_dm_meta() %>% - filter_dm_meta(catalog, schema) + filter_dm_meta(catalog, schema) %>% + collect() } dm_meta_raw <- function(con, catalog) { From fe545f32dfd722e9510728f4faee0769811c6088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 18:32:27 +0100 Subject: [PATCH 39/63] Fix new_dm2() for the case of more than one foreign key to the same table" --- R/dm.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/dm.R b/R/dm.R index 825699c38..8abad2b8d 100644 --- a/R/dm.R +++ b/R/dm.R @@ -109,7 +109,11 @@ new_dm2 <- function(tables = list(), pks_df <- enframe(pks, "table", "pks") - fks_df <- enframe(fks, "table", "fks") + fks_df <- + enframe(fks, "table", "fks") %>% + group_by(table) %>% + summarize(fks = list(bind_rows(fks))) %>% + ungroup() filters <- tibble( From a755b1698d0d52fa337c2d145779fc1ceb43d8ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 18:32:47 +0100 Subject: [PATCH 40/63] Fix dbname default -- NA means current db --- R/learn.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index ce43d00e4..3af4a7f40 100644 --- a/R/learn.R +++ b/R/learn.R @@ -32,7 +32,7 @@ #' # the `dm` from the SQLite DB #' iris_dm_learned <- dm_learn_from_db(src_sqlite) #' } -dm_learn_from_db <- function(dest, dbname = NULL, ...) { +dm_learn_from_db <- function(dest, dbname = NA, ...) { # assuming that we will not try to learn from (globally) temporary tables, which do not appear in sys.table con <- con_from_src_or_con(dest) src <- src_from_src_or_con(dest) From a1e8cc33519a5b998990a01796d535d0c094efc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 18:32:58 +0100 Subject: [PATCH 41/63] Fix test --- tests/testthat/test-learn.R | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test-learn.R b/tests/testthat/test-learn.R index 106c90711..1f82ce3bd 100644 --- a/tests/testthat/test-learn.R +++ b/tests/testthat/test-learn.R @@ -13,9 +13,6 @@ schema_name <- random_schema() test_that("Standard learning from MSSQL (schema 'dbo') or Postgres (schema 'public') and get_src_tbl_names() works?", { skip_if_src_not(c("mssql", "postgres")) - # FIXME: COMPOUND: Need to fix implementation - skip_if_remote_src() - # dm_learn_from_mssql() -------------------------------------------------- src_db <- my_test_src() @@ -34,7 +31,12 @@ test_that("Standard learning from MSSQL (schema 'dbo') or Postgres (schema 'publ map_chr( dm_get_tables(dm_for_filter_copied)[order_of_deletion], dbplyr::remote_name - ) + ) %>% + SQL() %>% + DBI::dbUnquoteIdentifier(conn = src_db$con) %>% + map_chr(~ .x@name[["table"]]) + + remote_tbl_map <- set_names(remote_tbl_names, gsub("^(tf_.).*$", "\\1", remote_tbl_names)) # test 'get_src_tbl_names()' src_tbl_names <- sort(unname(gsub("^.*\\.", "", get_src_tbl_names(src_db)))) @@ -44,20 +46,22 @@ test_that("Standard learning from MSSQL (schema 'dbo') or Postgres (schema 'publ sort(dbQuoteIdentifier(src_db$con, remote_tbl_names)) ) - dm_db_learned_all <- expect_message(dm_from_src(src_db)) + expect_message(dm_db_learned_all <- dm_from_src(src_db)) - # in case there happen to be other tables in schema "dbo" or "public" + # Select and fix table names dm_db_learned <- dm_db_learned_all %>% - dm_select_tbl(!!!remote_tbl_names) + dm_select_tbl(!!!remote_tbl_map) expect_equivalent_dm( dm_db_learned, - dm_for_filter()[order_of_deletion] + dm_for_filter()[order_of_deletion], + # FIXME: Enable fetching of on_delete information + ignore_on_delete = TRUE ) # learning without keys: - dm_db_learned_no_keys <- expect_silent(dm_from_src(src_db, learn_keys = FALSE)) + expect_silent(dm_db_learned_no_keys <- dm_from_src(src_db, learn_keys = FALSE)) # for learning from DB without learning the key relations dm_for_filter_no_keys <- @@ -69,10 +73,10 @@ test_that("Standard learning from MSSQL (schema 'dbo') or Postgres (schema 'publ ) %>% new_dm3() - # in case there happen to be other tables in schema "dbo" or "public" + # Select and fix table names dm_db_learned_no_keys <- dm_db_learned_no_keys %>% - dm_select_tbl(!!!remote_tbl_names) + dm_select_tbl(!!!remote_tbl_map) expect_equivalent_dm( dm_db_learned_no_keys, From 974885a003e910d30381831afc1cbeec41baecc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 19:57:17 +0100 Subject: [PATCH 42/63] Unrelated: fix warning --- tests/testthat/test-rows-dm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-rows-dm.R b/tests/testthat/test-rows-dm.R index 027fa22c5..26231caac 100644 --- a/tests/testthat/test-rows-dm.R +++ b/tests/testthat/test-rows-dm.R @@ -9,7 +9,7 @@ test_that("dm_rows_insert()", { skip_if_not_installed("nycflights13") skip_if_not_installed("RSQLite") - scoped_options(lifecycle_verbosity = "quiet") + local_options(lifecycle_verbosity = "quiet") expect_snapshot({ # Entire dataset with all dimension tables populated From cdd4870fc28e2f4c565dac8c34fd0fe3ee82255b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 19:58:57 +0100 Subject: [PATCH 43/63] Fix Postgres error --- R/learn.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index 3af4a7f40..d23bc3726 100644 --- a/R/learn.R +++ b/R/learn.R @@ -342,7 +342,7 @@ dm_learn_from_db_legacy <- function(con, dbname, ...) { schema_if <- function(schema, table, con, dbname = NULL) { table_sql <- DBI::dbQuoteIdentifier(con, table) - if (is_null(dbname) || dbname == "") { + if (is_null(dbname) || is.na(dbname) || dbname == "") { if_else( are_na(schema), table_sql, From c4f94b96defc1c69ba776873846b4ce67719faa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 21:48:32 +0100 Subject: [PATCH 44/63] Skip test on Postgres for now --- tests/testthat/test-learn.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-learn.R b/tests/testthat/test-learn.R index 1f82ce3bd..207f35e0e 100644 --- a/tests/testthat/test-learn.R +++ b/tests/testthat/test-learn.R @@ -13,6 +13,9 @@ schema_name <- random_schema() test_that("Standard learning from MSSQL (schema 'dbo') or Postgres (schema 'public') and get_src_tbl_names() works?", { skip_if_src_not(c("mssql", "postgres")) + # FIXME: Enable when fixed + skip_if_src("postgres") + # dm_learn_from_mssql() -------------------------------------------------- src_db <- my_test_src() From 9d6e72f8035e95d0bbdc13ae742453433884c6fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 21:48:46 +0100 Subject: [PATCH 45/63] Fix test --- tests/testthat/test-learn.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-learn.R b/tests/testthat/test-learn.R index 207f35e0e..0a5a4b146 100644 --- a/tests/testthat/test-learn.R +++ b/tests/testthat/test-learn.R @@ -91,9 +91,6 @@ test_that("Standard learning from MSSQL (schema 'dbo') or Postgres (schema 'publ test_that("Learning from specific schema on MSSQL or Postgres works?", { skip_if_src_not(c("mssql", "postgres")) - # FIXME: COMPOUND: Need to fix implementation - skip_if_remote_src() - src_db <- my_test_src() con_db <- src_db$con @@ -105,10 +102,13 @@ test_that("Learning from specific schema on MSSQL or Postgres works?", { src_db, dm_for_disambiguate(), temporary = FALSE, - table_names = ~ DBI::SQL(paste0(schema_name_q, ".", .x)) + schema = schema_name ) order_of_deletion <- c("iris_3", "iris_2", "iris_1") - remote_tbl_names <- set_names(paste0(schema_name_q, ".\"", order_of_deletion, "\""), order_of_deletion) + remote_tbl_names <- set_names( + paste0(schema_name_q, ".\"", order_of_deletion, "\""), + order_of_deletion + ) withr::defer({ walk( From 81f7f5a1ddefbe46180e3c541543e3d21299966c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 21:50:56 +0100 Subject: [PATCH 46/63] Simplify --- R/mssql.R | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/R/mssql.R b/R/mssql.R index 12f546878..4e2d3f04c 100644 --- a/R/mssql.R +++ b/R/mssql.R @@ -24,24 +24,7 @@ mssql_sys_all_db <- function(con, dbname, name, warn = FALSE) { reduce(compact(lazy), union_all) } -mssql_sys_databases <- function(con, dbname) { - if (is.null(dbname)) { - dbname <- - tbl(con, dbplyr::ident_q("sys.databases")) %>% - select(name) %>% - collect() %>% - pull() - } else if (is.na(dbname)) { - dbname <- NA_character_ - } else { - stopifnot(is.character(dbname)) - } - dbname -} - mssql_constraint_column_usage <- function(con, table_constraints, dbname) { - dbname <- mssql_sys_databases(con, dbname) - info_fkc <- table_constraints %>% select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% From 270918d44ae286bde55e976093ab453626f1bfeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 21:52:19 +0100 Subject: [PATCH 47/63] Remove dead code --- R/learn.R | 52 ---------------------------------------------------- 1 file changed, 52 deletions(-) diff --git a/R/learn.R b/R/learn.R index d23bc3726..0394b08d5 100644 --- a/R/learn.R +++ b/R/learn.R @@ -359,63 +359,11 @@ schema_if <- function(schema, table, con, dbname = NULL) { } db_learn_query <- function(dest, dbname, ...) { - if (is_mssql(dest)) { - return(mssql_learn_query(dest, dbname = dbname, ...)) - } if (is_postgres(dest)) { return(postgres_learn_query(dest, ...)) } } -mssql_learn_query <- function(con, schema = "dbo", dbname = NULL) { # taken directly from {datamodelr} and subsequently tweaked a little - dbname_sql <- if (is_null(dbname)) { - "" - } else { - paste0(DBI::dbQuoteIdentifier(con, dbname), ".") - } - glue::glue( - "select - schemas.name as [schema], - tabs.name as [table], - cols.name as [column], - isnull(ind_col.column_id, 0) as [key], - ref_tabs.name AS ref, - ref_cols.name AS ref_col, - 1 - cols.is_nullable as mandatory, - types.name as [type], - cols.max_length, - cols.precision, - cols.scale - from - {dbname_sql}sys.all_columns cols - inner join {dbname_sql}sys.tables tabs on - cols.object_id = tabs.object_id - inner join {dbname_sql}sys.schemas schemas on - tabs.schema_id = schemas.schema_id - left outer join {dbname_sql}sys.foreign_key_columns ref on - ref.parent_object_id = tabs.object_id - and ref.parent_column_id = cols.column_id - left outer join {dbname_sql}sys.indexes ind on - ind.object_id = tabs.object_id - and ind.is_primary_key = 1 - left outer join {dbname_sql}sys.index_columns ind_col on - ind_col.object_id = ind.object_id - and ind_col.index_id = ind.index_id - and ind_col.column_id = cols.column_id - left outer join {dbname_sql}sys.systypes [types] on - types.xusertype = cols.system_type_id - left outer join {dbname_sql}sys.tables ref_tabs on - ref_tabs.object_id = ref.referenced_object_id - left outer join {dbname_sql}sys.all_columns ref_cols on - ref_cols.object_id = ref.referenced_object_id - and ref_cols.column_id = ref.referenced_column_id - where schemas.name = {DBI::dbQuoteString(con, schema)} - order by - tabs.create_date, - cols.column_id" - ) -} - postgres_learn_query <- function(con, schema = "public", table_type = "BASE TABLE") { sprintf( "SELECT From bae9f74e4c00761a9f0bb56d2a38b7d88d518d04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 22:16:51 +0100 Subject: [PATCH 48/63] Simplify new_dm2() --- R/dm.R | 16 ++++------------ R/learn.R | 17 +++++++++-------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/R/dm.R b/R/dm.R index 8abad2b8d..c65a44de1 100644 --- a/R/dm.R +++ b/R/dm.R @@ -94,27 +94,19 @@ new_dm <- function(tables = list()) { } new_dm2 <- function(tables = list(), - pks = structure(list(), names = character()), - fks = structure(list(), names = character()), + pks_df = tibble(table = character(), pks = list()), + fks_df = tibble(table = character(), fks = list()), validate = TRUE) { # Legacy data <- unname(tables) table <- names2(tables) - stopifnot(!is.null(names(pks)), all(names(pks) %in% table)) - stopifnot(!is.null(names(fks)), all(names(fks) %in% table)) + stopifnot(all(pks_df$table %in% table)) + stopifnot(all(fks_df$table %in% table)) zoom <- new_zoom() col_tracker_zoom <- new_col_tracker_zoom() - pks_df <- enframe(pks, "table", "pks") - - fks_df <- - enframe(fks, "table", "fks") %>% - group_by(table) %>% - summarize(fks = list(bind_rows(fks))) %>% - ungroup() - filters <- tibble( table = table, diff --git a/R/learn.R b/R/learn.R index 0394b08d5..3878ed749 100644 --- a/R/learn.R +++ b/R/learn.R @@ -91,7 +91,7 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma tables <- map2(table_info$from, table_info$vars, ~ tbl(con, dbplyr::ident_q(.x), vars = .y)) names(tables) <- table_info$dm_name - pks <- + pks_df <- df_key_info %>% dm_zoom_to(key_column_usage) %>% anti_join(constraint_column_usage) %>% @@ -100,12 +100,11 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma dm_squash_to_tbl(key_column_usage) %>% select(constraint_catalog, constraint_schema, constraint_name, dm_name, column_name) %>% group_by(constraint_catalog, constraint_schema, constraint_name, dm_name) %>% - summarize(data = list(tibble(column = list(column_name)))) %>% + summarize(pks = list(tibble(column = list(column_name)))) %>% ungroup() %>% - select(dm_name, data) %>% - deframe() + select(table = dm_name, pks) - fks <- + fks_df <- df_key_info %>% dm_zoom_to(key_column_usage) %>% left_join(columns, select = c(column_name, dm_name, table_catalog, table_schema, table_name)) %>% @@ -149,7 +148,7 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma constraint_name, ref_table, ) %>% - summarize(data = list(tibble( + summarize(fks = list(tibble( ref_column = list(ref_column), table = if (length(table) > 0) table[[1]] else NA_character_, column = list(column), @@ -157,9 +156,11 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma ))) %>% ungroup() %>% select(-(1:3)) %>% - deframe() + group_by(table = ref_table) %>% + summarize(fks = list(bind_rows(fks))) %>% + ungroup() - new_dm2(tables, pks, fks) + new_dm2(tables, pks_df, fks_df) } dm_meta <- function(con, catalog = NA, schema = NULL) { From bf4098a7ba5ff7fb1156e0a434e6a50a799e9fc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 22:19:01 +0100 Subject: [PATCH 49/63] Remove unused --- scratch/kcu.sql | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 scratch/kcu.sql diff --git a/scratch/kcu.sql b/scratch/kcu.sql deleted file mode 100644 index 04242f65e..000000000 --- a/scratch/kcu.sql +++ /dev/null @@ -1,7 +0,0 @@ -SELECT KCU1.CONSTRAINT_SCHEMA AS FK_CONSTRAINT_SCHEMA , - KCU1.CONSTRAINT_NAME AS FK_CONSTRAINT_NAME , KCU1.TABLE_SCHEMA AS FK_TABLE_SCHEMA , KCU1.TABLE_NAME AS FK_TABLE_NAME , KCU1.COLUMN_NAME AS FK_COLUMN_NAME , KCU1.ORDINAL_POSITION AS FK_ORDINAL_POSITION , KCU2.CONSTRAINT_SCHEMA AS REFERENCED_CONSTRAINT_SCHEMA , KCU2.CONSTRAINT_NAME AS REFERENCED_CONSTRAINT_NAME , KCU2.TABLE_SCHEMA AS REFERENCED_TABLE_SCHEMA , - KCU2.TABLE_NAME AS REFERENCED_TABLE_NAME , KCU2.COLUMN_NAME AS REFERENCED_COLUMN_NAME , KCU2.ORDINAL_POSITION AS REFERENCED_ORDINAL_POSITION -FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS AS RC - INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KCU1 ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG AND - KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME - INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KCU2 ON KCU2.CONSTRAINT_CATALOG = RC.UNIQUE_CONSTRAINT_CATALOG AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION From 60d1f24ed254fe5be5142fafb16a4b541eeb50cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 14 Mar 2022 22:19:56 +0100 Subject: [PATCH 50/63] Move code --- R/learn.R | 146 ------------------------------------------------------ R/meta.R | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 146 deletions(-) create mode 100644 R/meta.R diff --git a/R/learn.R b/R/learn.R index 3878ed749..9b0bbec0a 100644 --- a/R/learn.R +++ b/R/learn.R @@ -163,152 +163,6 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma new_dm2(tables, pks_df, fks_df) } -dm_meta <- function(con, catalog = NA, schema = NULL) { - if (is_mssql(con)) { - if (is.null(catalog)) { - # FIXME: Classed error message? - abort("SQL server only supports learning from one database.") - } - - if (!is.na(catalog)) { - message("Temporarily switching to database ", tick(catalog), ".") - old_dbname <- dbGetQuery(con, "SELECT DB_NAME()")[[1]] - sql <- paste0("USE ", dbQuoteIdentifier(con, catalog)) - old_sql <- paste0("USE ", dbQuoteIdentifier(con, old_dbname)) - dbExecute(con, sql, immediate = TRUE) - withr::defer({ - dbExecute(con, old_sql, immediate = TRUE) - }) - } - } - - con %>% - dm_meta_raw(catalog) %>% - select_dm_meta() %>% - filter_dm_meta(catalog, schema) %>% - collect() -} - -dm_meta_raw <- function(con, catalog) { - src <- src_from_src_or_con(con) - - schemata <- tbl_lc(src, dbplyr::ident_q("information_schema.schemata")) - tables <- tbl_lc(src, dbplyr::ident_q("information_schema.tables")) - columns <- tbl_lc(src, dbplyr::ident_q("information_schema.columns")) - table_constraints <- tbl_lc(src, dbplyr::ident_q("information_schema.table_constraints")) - key_column_usage <- tbl_lc(src, dbplyr::ident_q("information_schema.key_column_usage")) - - if (is_postgres(src)) { - info_fkc <- - table_constraints %>% - select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% - filter(constraint_type == "FOREIGN KEY") - - constraint_column_usage <- - tbl_lc(src, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% - group_by(constraint_catalog, constraint_schema, constraint_name) %>% - mutate(ordinal_position = row_number()) %>% - ungroup() %>% - semi_join(info_fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) - - # FIXME: Also has `position_in_unique_constraint`, used elsewhere? - } else if (is_mssql(src)) { - constraint_column_usage <- mssql_constraint_column_usage(src, table_constraints, catalog) - } - - dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) %>% - dm_meta_add_keys() -} - -dm_meta_add_keys <- function(dm_meta) { - dm_meta %>% - dm_add_pk(schemata, c(catalog_name, schema_name)) %>% - dm_add_pk(tables, c(table_catalog, table_schema, table_name)) %>% - dm_add_fk(tables, c(table_catalog, table_schema), schemata) %>% - dm_add_pk(columns, c(table_catalog, table_schema, table_name, column_name)) %>% - dm_add_fk(columns, c(table_catalog, table_schema, table_name), tables) %>% - # dm_add_fk(table_constraints, table_schema, schemata) %>% - dm_add_pk(table_constraints, c(constraint_catalog, constraint_schema, constraint_name)) %>% - dm_add_fk(table_constraints, c(table_catalog, table_schema, table_name), tables) %>% - # constraint_schema vs. table_schema? - - # not on mssql: - # dm_add_fk(referential_constraints, c(constraint_schema, table_name), tables) %>% - # dm_add_fk(referential_constraints, c(constraint_schema, referenced_table_name), tables) %>% - - dm_add_pk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% - dm_add_fk(key_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% - dm_add_fk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% - # - # not on mariadb; - dm_add_pk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% - dm_add_fk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% - dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% - dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position), key_column_usage) %>% - # - dm_set_colors(brown = c(tables, columns), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) -} - -tbl_lc <- function(con, name) { - out <- tbl(con, name) - names <- colnames(out) - names_lc <- tolower(names) - if (all(names == names_lc)) { - return(out) - } - out %>% rename(!!!set_names(syms(names), names_lc)) -} - -select_dm_meta <- function(dm_meta) { - dm_meta %>% - dm_select(schemata, catalog_name, schema_name) %>% - dm_select(tables, table_catalog, table_schema, table_name, table_type) %>% - dm_select(columns, table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) %>% - dm_select(table_constraints, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) %>% - dm_select(key_column_usage, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position) %>% - dm_select(constraint_column_usage, table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name, ordinal_position) -} - -filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { - force(catalog) - force(schema) - - schemata <- dm_meta$schemata - tables <- dm_meta$tables - columns <- dm_meta$columns - table_constraints <- dm_meta$table_constraints - key_column_usage <- dm_meta$key_column_usage - constraint_column_usage <- dm_meta$constraint_column_usage - - if (!is.null(catalog) && !is.na(catalog)) { - schemata <- schemata %>% filter(catalog_name %in% !!catalog) - tables <- tables %>% filter(table_catalog %in% !!catalog) - columns <- columns %>% filter(table_catalog %in% !!catalog) - table_constraints <- table_constraints %>% filter(table_catalog %in% !!catalog) - key_column_usage <- key_column_usage %>% filter(table_catalog %in% !!catalog) - constraint_column_usage <- constraint_column_usage %>% filter(table_catalog %in% !!catalog) - } - - if (!is.null(schema)) { - schemata <- schemata %>% filter(schema_name %in% !!schema) - tables <- tables %>% filter(table_schema %in% !!schema) - columns <- columns %>% filter(table_schema %in% !!schema) - table_constraints <- table_constraints %>% filter(table_schema %in% !!schema) - key_column_usage <- key_column_usage %>% filter(table_schema %in% !!schema) - constraint_column_usage <- constraint_column_usage %>% filter(table_schema %in% !!schema) - } - - dm( - schemata, - tables, - columns, - table_constraints, - key_column_usage, - constraint_column_usage - ) %>% - dm_meta_add_keys() -} - dm_learn_from_db_legacy <- function(con, dbname, ...) { sql <- db_learn_query(con, dbname = dbname, ...) if (is.null(sql)) { diff --git a/R/meta.R b/R/meta.R new file mode 100644 index 000000000..3676a3074 --- /dev/null +++ b/R/meta.R @@ -0,0 +1,145 @@ +dm_meta <- function(con, catalog = NA, schema = NULL) { + if (is_mssql(con)) { + if (is.null(catalog)) { + # FIXME: Classed error message? + abort("SQL server only supports learning from one database.") + } + + if (!is.na(catalog)) { + message("Temporarily switching to database ", tick(catalog), ".") + old_dbname <- dbGetQuery(con, "SELECT DB_NAME()")[[1]] + sql <- paste0("USE ", dbQuoteIdentifier(con, catalog)) + old_sql <- paste0("USE ", dbQuoteIdentifier(con, old_dbname)) + dbExecute(con, sql, immediate = TRUE) + withr::defer({ + dbExecute(con, old_sql, immediate = TRUE) + }) + } + } + + con %>% + dm_meta_raw(catalog) %>% + select_dm_meta() %>% + filter_dm_meta(catalog, schema) %>% + collect() +} + +dm_meta_raw <- function(con, catalog) { + src <- src_from_src_or_con(con) + + schemata <- tbl_lc(src, dbplyr::ident_q("information_schema.schemata")) + tables <- tbl_lc(src, dbplyr::ident_q("information_schema.tables")) + columns <- tbl_lc(src, dbplyr::ident_q("information_schema.columns")) + table_constraints <- tbl_lc(src, dbplyr::ident_q("information_schema.table_constraints")) + key_column_usage <- tbl_lc(src, dbplyr::ident_q("information_schema.key_column_usage")) + + if (is_postgres(src)) { + info_fkc <- + table_constraints %>% + select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% + filter(constraint_type == "FOREIGN KEY") + + constraint_column_usage <- + tbl_lc(src, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% + group_by(constraint_catalog, constraint_schema, constraint_name) %>% + mutate(ordinal_position = row_number()) %>% + ungroup() %>% + semi_join(info_fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) + + # FIXME: Also has `position_in_unique_constraint`, used elsewhere? + } else if (is_mssql(src)) { + constraint_column_usage <- mssql_constraint_column_usage(src, table_constraints, catalog) + } + + dm(schemata, tables, columns, table_constraints, key_column_usage, constraint_column_usage) %>% + dm_meta_add_keys() +} + +dm_meta_add_keys <- function(dm_meta) { + dm_meta %>% + dm_add_pk(schemata, c(catalog_name, schema_name)) %>% + dm_add_pk(tables, c(table_catalog, table_schema, table_name)) %>% + dm_add_fk(tables, c(table_catalog, table_schema), schemata) %>% + dm_add_pk(columns, c(table_catalog, table_schema, table_name, column_name)) %>% + dm_add_fk(columns, c(table_catalog, table_schema, table_name), tables) %>% + # dm_add_fk(table_constraints, table_schema, schemata) %>% + dm_add_pk(table_constraints, c(constraint_catalog, constraint_schema, constraint_name)) %>% + dm_add_fk(table_constraints, c(table_catalog, table_schema, table_name), tables) %>% + # constraint_schema vs. table_schema? + + # not on mssql: + # dm_add_fk(referential_constraints, c(constraint_schema, table_name), tables) %>% + # dm_add_fk(referential_constraints, c(constraint_schema, referenced_table_name), tables) %>% + + dm_add_pk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% + dm_add_fk(key_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% + dm_add_fk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% + # + # not on mariadb; + dm_add_pk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% + dm_add_fk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% + dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% + dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position), key_column_usage) %>% + # + dm_set_colors(brown = c(tables, columns), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) +} + +tbl_lc <- function(con, name) { + out <- tbl(con, name) + names <- colnames(out) + names_lc <- tolower(names) + if (all(names == names_lc)) { + return(out) + } + out %>% rename(!!!set_names(syms(names), names_lc)) +} + +select_dm_meta <- function(dm_meta) { + dm_meta %>% + dm_select(schemata, catalog_name, schema_name) %>% + dm_select(tables, table_catalog, table_schema, table_name, table_type) %>% + dm_select(columns, table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) %>% + dm_select(table_constraints, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) %>% + dm_select(key_column_usage, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position) %>% + dm_select(constraint_column_usage, table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name, ordinal_position) +} + +filter_dm_meta <- function(dm_meta, catalog = NULL, schema = NULL) { + force(catalog) + force(schema) + + schemata <- dm_meta$schemata + tables <- dm_meta$tables + columns <- dm_meta$columns + table_constraints <- dm_meta$table_constraints + key_column_usage <- dm_meta$key_column_usage + constraint_column_usage <- dm_meta$constraint_column_usage + + if (!is.null(catalog) && !is.na(catalog)) { + schemata <- schemata %>% filter(catalog_name %in% !!catalog) + tables <- tables %>% filter(table_catalog %in% !!catalog) + columns <- columns %>% filter(table_catalog %in% !!catalog) + table_constraints <- table_constraints %>% filter(table_catalog %in% !!catalog) + key_column_usage <- key_column_usage %>% filter(table_catalog %in% !!catalog) + constraint_column_usage <- constraint_column_usage %>% filter(table_catalog %in% !!catalog) + } + + if (!is.null(schema)) { + schemata <- schemata %>% filter(schema_name %in% !!schema) + tables <- tables %>% filter(table_schema %in% !!schema) + columns <- columns %>% filter(table_schema %in% !!schema) + table_constraints <- table_constraints %>% filter(table_schema %in% !!schema) + key_column_usage <- key_column_usage %>% filter(table_schema %in% !!schema) + constraint_column_usage <- constraint_column_usage %>% filter(table_schema %in% !!schema) + } + + dm( + schemata, + tables, + columns, + table_constraints, + key_column_usage, + constraint_column_usage + ) %>% + dm_meta_add_keys() +} From 662f4e53cdbc15527c997cb4c05c950783816ad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 28 Mar 2022 06:45:27 +0200 Subject: [PATCH 51/63] Reorder --- R/dm-from-src.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dm-from-src.R b/R/dm-from-src.R index 592961357..9092b7c6d 100644 --- a/R/dm-from-src.R +++ b/R/dm-from-src.R @@ -78,12 +78,12 @@ dm_from_src <- function(src = NULL, table_names = NULL, learn_keys = NULL, inform("Keys queried successfully, use `learn_keys = TRUE` to mute this message.") } - tbls_in_dm <- src_tbls_impl(dm_learned) - if (is_null(table_names)) { return(dm_learned) } + tbls_in_dm <- src_tbls_impl(dm_learned) + if (!all(table_names %in% tbls_in_dm)) { abort_tbl_access(setdiff(table_names, tbls_in_dm)) } From ab2aada040c53ea9030fbc676a0f695ffb3cb022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 28 Mar 2022 06:45:46 +0200 Subject: [PATCH 52/63] Use unclassed type for table name --- R/learn.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/learn.R b/R/learn.R index 9b0bbec0a..015f65622 100644 --- a/R/learn.R +++ b/R/learn.R @@ -59,8 +59,9 @@ dm_learn_from_db_meta <- function(con, catalog = NULL, schema = NULL, name_forma dm_name <- df_info$tables %>% select(catalog = table_catalog, schema = table_schema, table = table_name) %>% - mutate(name = glue(name_format)) %>% + mutate(name = glue(!!name_format)) %>% pull() %>% + unclass() %>% vec_as_names(repair = "unique") from <- From f1ad92122f3e409cc196f6c533a3daf4da56a1aa Mon Sep 17 00:00:00 2001 From: krlmlr Date: Mon, 28 Mar 2022 04:55:49 +0000 Subject: [PATCH 53/63] Auto-update from GitHub Actions Run: https://github.com/cynkra/dm/actions/runs/2050203171 --- R/check-cardinalities.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/check-cardinalities.R b/R/check-cardinalities.R index 1f6eddd93..6f69e18ad 100644 --- a/R/check-cardinalities.R +++ b/R/check-cardinalities.R @@ -206,11 +206,9 @@ examine_cardinality_impl <- function(parent_table, parent_key_cols, child_table, if (min_1 && max_1) { return("bijective mapping (child: 1 -> parent: 1)") - } else - if (min_1) { + } else if (min_1) { return("surjective mapping (child: 1 to n -> parent: 1)") - } else - if (max_1) { + } else if (max_1) { return("injective mapping (child: 0 or 1 -> parent: 1)") } "generic mapping (child: 0 to n -> parent: 1)" From 499dbe826b4310142478762d70b32b1179aa17e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 30 Apr 2022 00:20:44 +0200 Subject: [PATCH 54/63] vars argument --- R/mssql.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mssql.R b/R/mssql.R index 4e2d3f04c..50bd8f312 100644 --- a/R/mssql.R +++ b/R/mssql.R @@ -1,4 +1,4 @@ -mssql_sys_db <- function(con, dbname, name) { +mssql_sys_db <- function(con, dbname, name, vars = NULL) { if (is.na(dbname)) { fq_name <- name sql_name <- sql("DB_NAME()") @@ -6,14 +6,14 @@ mssql_sys_db <- function(con, dbname, name) { fq_name <- paste0(dbname, ".", name) sql_name <- dbname } - tbl(con, dbplyr::ident_q(fq_name)) %>% + tbl(con, dbplyr::ident_q(fq_name), vars = vars) %>% mutate(catalog = !!sql_name) %>% select(catalog, everything()) } -mssql_sys_all_db <- function(con, dbname, name, warn = FALSE) { +mssql_sys_all_db <- function(con, dbname, name, warn = FALSE, vars = NULL) { lazy <- map(dbname, ~ tryCatch( - mssql_sys_db(con, .x, name), + mssql_sys_db(con, .x, name, vars), error = function(e) { if (warn) { warn(paste0("Can't access database ", .x, ": ", conditionMessage(e))) From b21291433f3f34eb4e121cf4777e09eb26e88c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 30 Apr 2022 00:16:10 +0200 Subject: [PATCH 55/63] Test: single db with vars --- R/mssql.R | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/R/mssql.R b/R/mssql.R index 50bd8f312..189626660 100644 --- a/R/mssql.R +++ b/R/mssql.R @@ -30,19 +30,33 @@ mssql_constraint_column_usage <- function(con, table_constraints, dbname) { select(constraint_catalog, constraint_schema, constraint_name, constraint_type) %>% filter(constraint_type == "FOREIGN KEY") - fkc <- - mssql_sys_all_db(con, dbname, "sys.foreign_key_columns", warn = TRUE) + fkc <- mssql_sys_db(con, dbname, "sys.foreign_key_columns", vars = c( + "constraint_object_id", "constraint_column_id", + "referenced_object_id", "referenced_column_id" + )) + columns <- - mssql_sys_all_db(con, dbname, "sys.columns") %>% - select(catalog = catalog, column_name = name, object_id, column_id) + mssql_sys_db(con, dbname, "sys.columns", vars = c( + "name", "object_id", "column_id" + )) %>% + rename(column_name = name) + tables <- - mssql_sys_all_db(con, dbname, "sys.tables") %>% - select(catalog = catalog, schema_id, table_name = name, object_id) + mssql_sys_db(con, dbname, "sys.tables", vars = c( + "schema_id", "name", "object_id" + )) %>% + rename(table_name = name) + schemas <- - mssql_sys_all_db(con, dbname, "sys.schemas") %>% - select(catalog = catalog, schema_id, table_schema = name) + mssql_sys_db(con, dbname, "sys.schemas", vars = c( + "schema_id", "name" + )) %>% + rename(table_schema = name) + objects <- - mssql_sys_all_db(con, dbname, "sys.objects") %>% + mssql_sys_db(con, dbname, "sys.objects", vars = c( + "name", "object_id" + )) %>% select(constraint_name = name, object_id) sys_fkc_column_usage <- From d89da8ecea283a60727bf5fcd1bcf02b3b29c356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 30 Apr 2022 00:19:51 +0200 Subject: [PATCH 56/63] Explicit variable names --- R/meta.R | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/R/meta.R b/R/meta.R index 3676a3074..2bc1b180b 100644 --- a/R/meta.R +++ b/R/meta.R @@ -27,11 +27,34 @@ dm_meta <- function(con, catalog = NA, schema = NULL) { dm_meta_raw <- function(con, catalog) { src <- src_from_src_or_con(con) - schemata <- tbl_lc(src, dbplyr::ident_q("information_schema.schemata")) - tables <- tbl_lc(src, dbplyr::ident_q("information_schema.tables")) - columns <- tbl_lc(src, dbplyr::ident_q("information_schema.columns")) - table_constraints <- tbl_lc(src, dbplyr::ident_q("information_schema.table_constraints")) - key_column_usage <- tbl_lc(src, dbplyr::ident_q("information_schema.key_column_usage")) + local_options(digits.secs = 6) + + schemata <- tbl(src, dbplyr::ident_q("information_schema.schemata"), vars = c( + "catalog_name", "schema_name", "schema_owner", "default_character_set_catalog", + "default_character_set_schema", "default_character_set_name" + )) + tables <- tbl(src, dbplyr::ident_q("information_schema.tables"), vars = c( + "table_catalog", "table_schema", "table_name", "table_type" + )) + columns <- tbl(src, dbplyr::ident_q("information_schema.columns"), vars = c( + "table_catalog", "table_schema", "table_name", "column_name", + "ordinal_position", "column_default", "is_nullable", "data_type", + "character_maximum_length", "character_octet_length", "numeric_precision", + "numeric_precision_radix", "numeric_scale", "datetime_precision", + "character_set_catalog", "character_set_schema", "character_set_name", + "collation_catalog", "collation_schema", "collation_name", "domain_catalog", + "domain_schema", "domain_name" + )) + table_constraints <- tbl(src, dbplyr::ident_q("information_schema.table_constraints"), vars = c( + "constraint_catalog", "constraint_schema", "constraint_name", + "table_catalog", "table_schema", "table_name", "constraint_type", + "is_deferrable", "initially_deferred" + )) + key_column_usage <- tbl(src, dbplyr::ident_q("information_schema.key_column_usage"), vars = c( + "constraint_catalog", "constraint_schema", "constraint_name", + "table_catalog", "table_schema", "table_name", "column_name", + "ordinal_position" + )) if (is_postgres(src)) { info_fkc <- From 5563eba94c555934d36dc2dc30545d887c8ae8f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 30 Apr 2022 00:50:57 +0200 Subject: [PATCH 57/63] Remove mssql_sys_all_db() --- R/mssql.R | 13 ------------- R/zzz.R | 1 - 2 files changed, 14 deletions(-) diff --git a/R/mssql.R b/R/mssql.R index 189626660..175ce1e6d 100644 --- a/R/mssql.R +++ b/R/mssql.R @@ -11,19 +11,6 @@ mssql_sys_db <- function(con, dbname, name, vars = NULL) { select(catalog, everything()) } -mssql_sys_all_db <- function(con, dbname, name, warn = FALSE, vars = NULL) { - lazy <- map(dbname, ~ tryCatch( - mssql_sys_db(con, .x, name, vars), - error = function(e) { - if (warn) { - warn(paste0("Can't access database ", .x, ": ", conditionMessage(e))) - } - NULL - } - )) - reduce(compact(lazy), union_all) -} - mssql_constraint_column_usage <- function(con, table_constraints, dbname) { info_fkc <- table_constraints %>% diff --git a/R/zzz.R b/R/zzz.R index d14c852dd..87d3cfced 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -20,7 +20,6 @@ check_version_on_load("RSQLite", "2.2.8", "to use the {.code returning} argument in {.code dm::rows_*()}") # rigg(enum_pk_candidates_impl) - # rigg(mssql_sys_all_db) # rigg(build_copy_data) # rigg(dm_insert_zoomed_outgoing_fks) # rigg(dm_upgrade) From 5dc009efd2c6202040a0455725e4ed9799a0ec29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 30 Apr 2022 08:31:24 +0200 Subject: [PATCH 58/63] Later --- R/meta.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/meta.R b/R/meta.R index 2bc1b180b..3e43bc807 100644 --- a/R/meta.R +++ b/R/meta.R @@ -20,8 +20,7 @@ dm_meta <- function(con, catalog = NA, schema = NULL) { con %>% dm_meta_raw(catalog) %>% select_dm_meta() %>% - filter_dm_meta(catalog, schema) %>% - collect() + filter_dm_meta(catalog, schema) } dm_meta_raw <- function(con, catalog) { From d9845b8eb1b9483eb52934bf905d4b4475750ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 28 May 2022 06:47:13 +0200 Subject: [PATCH 59/63] Revert "Explicit variable names" This reverts commit d89da8ecea283a60727bf5fcd1bcf02b3b29c356. --- R/meta.R | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/R/meta.R b/R/meta.R index 3e43bc807..a689b5671 100644 --- a/R/meta.R +++ b/R/meta.R @@ -26,34 +26,11 @@ dm_meta <- function(con, catalog = NA, schema = NULL) { dm_meta_raw <- function(con, catalog) { src <- src_from_src_or_con(con) - local_options(digits.secs = 6) - - schemata <- tbl(src, dbplyr::ident_q("information_schema.schemata"), vars = c( - "catalog_name", "schema_name", "schema_owner", "default_character_set_catalog", - "default_character_set_schema", "default_character_set_name" - )) - tables <- tbl(src, dbplyr::ident_q("information_schema.tables"), vars = c( - "table_catalog", "table_schema", "table_name", "table_type" - )) - columns <- tbl(src, dbplyr::ident_q("information_schema.columns"), vars = c( - "table_catalog", "table_schema", "table_name", "column_name", - "ordinal_position", "column_default", "is_nullable", "data_type", - "character_maximum_length", "character_octet_length", "numeric_precision", - "numeric_precision_radix", "numeric_scale", "datetime_precision", - "character_set_catalog", "character_set_schema", "character_set_name", - "collation_catalog", "collation_schema", "collation_name", "domain_catalog", - "domain_schema", "domain_name" - )) - table_constraints <- tbl(src, dbplyr::ident_q("information_schema.table_constraints"), vars = c( - "constraint_catalog", "constraint_schema", "constraint_name", - "table_catalog", "table_schema", "table_name", "constraint_type", - "is_deferrable", "initially_deferred" - )) - key_column_usage <- tbl(src, dbplyr::ident_q("information_schema.key_column_usage"), vars = c( - "constraint_catalog", "constraint_schema", "constraint_name", - "table_catalog", "table_schema", "table_name", "column_name", - "ordinal_position" - )) + schemata <- tbl_lc(src, dbplyr::ident_q("information_schema.schemata")) + tables <- tbl_lc(src, dbplyr::ident_q("information_schema.tables")) + columns <- tbl_lc(src, dbplyr::ident_q("information_schema.columns")) + table_constraints <- tbl_lc(src, dbplyr::ident_q("information_schema.table_constraints")) + key_column_usage <- tbl_lc(src, dbplyr::ident_q("information_schema.key_column_usage")) if (is_postgres(src)) { info_fkc <- From a6ec1f68ba92fb35b083ec83ac641ed04ccac1f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 28 May 2022 06:48:14 +0200 Subject: [PATCH 60/63] Always collect on SQL Server --- R/meta.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/R/meta.R b/R/meta.R index a689b5671..8c3931ad1 100644 --- a/R/meta.R +++ b/R/meta.R @@ -1,4 +1,6 @@ dm_meta <- function(con, catalog = NA, schema = NULL) { + need_collect <- FALSE + if (is_mssql(con)) { if (is.null(catalog)) { # FIXME: Classed error message? @@ -14,13 +16,23 @@ dm_meta <- function(con, catalog = NA, schema = NULL) { withr::defer({ dbExecute(con, old_sql, immediate = TRUE) }) + need_collect <- TRUE } } - con %>% + out <- + con %>% dm_meta_raw(catalog) %>% select_dm_meta() %>% filter_dm_meta(catalog, schema) + + if (need_collect) { + out <- + out %>% + collect() + } + + out } dm_meta_raw <- function(con, catalog) { From 9cccf866d543d16da7583e12330ae611865697a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 28 May 2022 06:49:32 +0200 Subject: [PATCH 61/63] Revert "Revert "Explicit variable names"" This reverts commit d9845b8eb1b9483eb52934bf905d4b4475750ff7. --- R/meta.R | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/R/meta.R b/R/meta.R index 8c3931ad1..28c5785e2 100644 --- a/R/meta.R +++ b/R/meta.R @@ -38,11 +38,34 @@ dm_meta <- function(con, catalog = NA, schema = NULL) { dm_meta_raw <- function(con, catalog) { src <- src_from_src_or_con(con) - schemata <- tbl_lc(src, dbplyr::ident_q("information_schema.schemata")) - tables <- tbl_lc(src, dbplyr::ident_q("information_schema.tables")) - columns <- tbl_lc(src, dbplyr::ident_q("information_schema.columns")) - table_constraints <- tbl_lc(src, dbplyr::ident_q("information_schema.table_constraints")) - key_column_usage <- tbl_lc(src, dbplyr::ident_q("information_schema.key_column_usage")) + local_options(digits.secs = 6) + + schemata <- tbl(src, dbplyr::ident_q("information_schema.schemata"), vars = c( + "catalog_name", "schema_name", "schema_owner", "default_character_set_catalog", + "default_character_set_schema", "default_character_set_name" + )) + tables <- tbl(src, dbplyr::ident_q("information_schema.tables"), vars = c( + "table_catalog", "table_schema", "table_name", "table_type" + )) + columns <- tbl(src, dbplyr::ident_q("information_schema.columns"), vars = c( + "table_catalog", "table_schema", "table_name", "column_name", + "ordinal_position", "column_default", "is_nullable", "data_type", + "character_maximum_length", "character_octet_length", "numeric_precision", + "numeric_precision_radix", "numeric_scale", "datetime_precision", + "character_set_catalog", "character_set_schema", "character_set_name", + "collation_catalog", "collation_schema", "collation_name", "domain_catalog", + "domain_schema", "domain_name" + )) + table_constraints <- tbl(src, dbplyr::ident_q("information_schema.table_constraints"), vars = c( + "constraint_catalog", "constraint_schema", "constraint_name", + "table_catalog", "table_schema", "table_name", "constraint_type", + "is_deferrable", "initially_deferred" + )) + key_column_usage <- tbl(src, dbplyr::ident_q("information_schema.key_column_usage"), vars = c( + "constraint_catalog", "constraint_schema", "constraint_name", + "table_catalog", "table_schema", "table_name", "column_name", + "ordinal_position" + )) if (is_postgres(src)) { info_fkc <- From 51193308b9898e26b41c7e1295c11739b6a167b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 28 May 2022 06:50:30 +0200 Subject: [PATCH 62/63] Explicit SELECT --- R/meta.R | 26 +++++++++++++------------- R/mssql.R | 5 ++++- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/R/meta.R b/R/meta.R index 28c5785e2..3826c9451 100644 --- a/R/meta.R +++ b/R/meta.R @@ -40,14 +40,14 @@ dm_meta_raw <- function(con, catalog) { local_options(digits.secs = 6) - schemata <- tbl(src, dbplyr::ident_q("information_schema.schemata"), vars = c( + schemata <- tbl_lc(src, "information_schema.schemata", vars = c( "catalog_name", "schema_name", "schema_owner", "default_character_set_catalog", "default_character_set_schema", "default_character_set_name" )) - tables <- tbl(src, dbplyr::ident_q("information_schema.tables"), vars = c( + tables <- tbl_lc(src, "information_schema.tables", vars = c( "table_catalog", "table_schema", "table_name", "table_type" )) - columns <- tbl(src, dbplyr::ident_q("information_schema.columns"), vars = c( + columns <- tbl_lc(src, "information_schema.columns", vars = c( "table_catalog", "table_schema", "table_name", "column_name", "ordinal_position", "column_default", "is_nullable", "data_type", "character_maximum_length", "character_octet_length", "numeric_precision", @@ -56,12 +56,12 @@ dm_meta_raw <- function(con, catalog) { "collation_catalog", "collation_schema", "collation_name", "domain_catalog", "domain_schema", "domain_name" )) - table_constraints <- tbl(src, dbplyr::ident_q("information_schema.table_constraints"), vars = c( + table_constraints <- tbl_lc(src, "information_schema.table_constraints", vars = c( "constraint_catalog", "constraint_schema", "constraint_name", "table_catalog", "table_schema", "table_name", "constraint_type", "is_deferrable", "initially_deferred" )) - key_column_usage <- tbl(src, dbplyr::ident_q("information_schema.key_column_usage"), vars = c( + key_column_usage <- tbl_lc(src, "information_schema.key_column_usage", vars = c( "constraint_catalog", "constraint_schema", "constraint_name", "table_catalog", "table_schema", "table_name", "column_name", "ordinal_position" @@ -118,14 +118,14 @@ dm_meta_add_keys <- function(dm_meta) { dm_set_colors(brown = c(tables, columns), blue = schemata, green4 = ends_with("_constraints"), orange = ends_with("_usage")) } -tbl_lc <- function(con, name) { - out <- tbl(con, name) - names <- colnames(out) - names_lc <- tolower(names) - if (all(names == names_lc)) { - return(out) - } - out %>% rename(!!!set_names(syms(names), names_lc)) +tbl_lc <- function(con, name, vars) { + from <- paste0( + "SELECT ", + paste0(DBI::dbQuoteIdentifier(con_from_src_or_con(con), vars), collapse = ", "), + "\nFROM ", name + ) + + tbl(con, sql(from), vars = vars) } select_dm_meta <- function(dm_meta) { diff --git a/R/mssql.R b/R/mssql.R index 175ce1e6d..8647450a2 100644 --- a/R/mssql.R +++ b/R/mssql.R @@ -55,7 +55,10 @@ mssql_constraint_column_usage <- function(con, table_constraints, dbname) { # table_schema is used twice transmute(constraint_catalog = catalog, constraint_schema = table_schema, constraint_name, table_schema, table_name, column_name, ordinal_position = constraint_column_id) - tbl_lc(con, dbplyr::ident_q("information_schema.constraint_column_usage")) %>% + tbl_lc(con, "information_schema.constraint_column_usage", vars = c( + "table_catalog", "table_schema", "table_name", "column_name", + "constraint_catalog", "constraint_schema", "constraint_name" + )) %>% semi_join(info_fkc, by = c("constraint_catalog", "constraint_schema", "constraint_name")) %>% select(-table_schema, -table_name, -column_name) %>% distinct() %>% From 424701bf90adcbe0f2aa4569f221fa022facbd46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 28 May 2022 07:06:43 +0200 Subject: [PATCH 63/63] Add snapshot test --- tests/testthat/_snaps/learn.md | 40 ++++++++++++++++++++++++++++++++++ tests/testthat/test-learn.R | 9 ++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/_snaps/learn.md diff --git a/tests/testthat/_snaps/learn.md b/tests/testthat/_snaps/learn.md new file mode 100644 index 000000000..065e3c5b2 --- /dev/null +++ b/tests/testthat/_snaps/learn.md @@ -0,0 +1,40 @@ +# dm_meta() data model + + Code + dm_meta(my_test_src()) %>% dm_paste(options = c("select", "keys", "color")) + Message + dm::dm( + schemata, + tables, + columns, + table_constraints, + key_column_usage, + constraint_column_usage, + ) %>% + dm::dm_select(schemata, catalog_name, schema_name) %>% + dm::dm_select(tables, table_catalog, table_schema, table_name, table_type) %>% + dm::dm_select(columns, table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable) %>% + dm::dm_select(table_constraints, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, constraint_type) %>% + dm::dm_select(key_column_usage, constraint_catalog, constraint_schema, constraint_name, table_catalog, table_schema, table_name, column_name, ordinal_position) %>% + dm::dm_select(constraint_column_usage, table_catalog, table_schema, table_name, column_name, constraint_catalog, constraint_schema, constraint_name, ordinal_position) %>% + dm::dm_add_pk(schemata, c(catalog_name, schema_name)) %>% + dm::dm_add_pk(tables, c(table_catalog, table_schema, table_name)) %>% + dm::dm_add_pk(columns, c(table_catalog, table_schema, table_name, column_name)) %>% + dm::dm_add_pk(table_constraints, c(constraint_catalog, constraint_schema, constraint_name)) %>% + dm::dm_add_pk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% + dm::dm_add_pk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position)) %>% + dm::dm_add_fk(tables, c(table_catalog, table_schema), schemata) %>% + dm::dm_add_fk(columns, c(table_catalog, table_schema, table_name), tables) %>% + dm::dm_add_fk(table_constraints, c(table_catalog, table_schema, table_name), tables) %>% + dm::dm_add_fk(key_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% + dm::dm_add_fk(constraint_column_usage, c(table_catalog, table_schema, table_name, column_name), columns) %>% + dm::dm_add_fk(key_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% + dm::dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name), table_constraints) %>% + dm::dm_add_fk(constraint_column_usage, c(constraint_catalog, constraint_schema, constraint_name, ordinal_position), key_column_usage) %>% + dm::dm_set_colors(`#0000FFFF` = schemata) %>% + dm::dm_set_colors(`#A52A2AFF` = tables) %>% + dm::dm_set_colors(`#A52A2AFF` = columns) %>% + dm::dm_set_colors(`#008B00FF` = table_constraints) %>% + dm::dm_set_colors(`#FFA500FF` = key_column_usage) %>% + dm::dm_set_colors(`#FFA500FF` = constraint_column_usage) + diff --git a/tests/testthat/test-learn.R b/tests/testthat/test-learn.R index 0a5a4b146..1a617e559 100644 --- a/tests/testthat/test-learn.R +++ b/tests/testthat/test-learn.R @@ -372,6 +372,11 @@ test_that("Learning from a specific schema in another DB for MSSQL works?", { ) }) -# tests for compound keys ------------------------------------------------- +test_that("dm_meta() data model", { + skip_if_src_not("mssql") -# test is already done in test-dm-from-src.R + expect_snapshot({ + dm_meta(my_test_src()) %>% + dm_paste(options = c("select", "keys", "color")) + }) +})