From 65e3e3d8a835140e1ed1dc0506bcbdbf6393439f Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Tue, 11 Jun 2024 12:06:19 -0400 Subject: [PATCH 01/10] Rough pass at checking if a table exists in the db --- cmd/check_db.go | 162 ++++++++++++++++++++++++++++++++++++++++++++++++ cmd/root.go | 4 ++ db_tables.txt | 134 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 300 insertions(+) create mode 100644 cmd/check_db.go create mode 100644 db_tables.txt diff --git a/cmd/check_db.go b/cmd/check_db.go new file mode 100644 index 0000000..64330b1 --- /dev/null +++ b/cmd/check_db.go @@ -0,0 +1,162 @@ +/* +Copyright © 2024 NAME HERE +*/ +package cmd + +import ( + "bufio" + "database/sql" + "fmt" + "log" + "os" + "strings" + + "github.com/spf13/cobra" + "gov.gsa.fac.cgov-util/internal/logging" + "gov.gsa.fac.cgov-util/internal/vcap" +) + +var ( + source_database string +) + +func check_if_table_exists(source_creds vcap.Credentials) { + //SELECT EXISTS (SELECT FROM pg_tables WHERE schemaname = 'schema_name' AND tablename = 'table_name'); + db, err := sql.Open("postgres", source_creds.Get("uri").String()) + if err != nil { + logging.Logger.Println("TABLECHECK could not connect to DB for checking table existance") + logging.Logger.Printf("DBTOS3 %s\n", err) + os.Exit(logging.DB_SCHEMA_SCAN_FAILURE) + } + + file, err := os.Open("db_tables.txt") + if err != nil { + log.Fatal(err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + var not_existing []string + for scanner.Scan() { + //scanner.Text() + query := fmt.Sprintf("select * from %s ;", scanner.Text()) + //query := fmt.Sprintf("SELECT EXISTS (SELECT FROM pg_tables WHERE schemaname = 'public' AND tablename = '%s')", scanner.Text()) + rows, table_check := db.Query(query) + + if table_check == nil { + //fmt.Println(i + " exists") + rows.Next() + } else { + //logging.Logger.Println(scanner.Text() + " does not exist") + // store all scanner.Text() into a map + // if map != nil + // hard exit + not_existing = append(not_existing, scanner.Text()) + } + } + if len(not_existing) > 0 { + logging.Logger.Println("An array of tables that does not exist in the database, but does exist in a manifest has been returned.") + logging.Logger.Println("System exiting...") + joined_tables := strings.Join(not_existing[:], " ") + logging.Logger.Printf(joined_tables) + os.Exit(3) + } else { + logging.Logger.Printf("Manifest and Database tables appear to be in sync.") + } + + if err := scanner.Err(); err != nil { + log.Fatal(err) + } + + //for table := range table_to_schema { + // for table := range list_of_tables { + // //"SELECT schemaname, tablename FROM pg_tables WHERE schemaname = 'public'" + // query := fmt.Sprintf("SELECT EXISTS (SELECT FROM pg_tables WHERE schemaname = 'public' AND tablename = '%s')", table) + // rows, table_check := db.Query(query) + + // if table_check == nil { + // fmt.Printf(table + " exists") + // rows.Close() + // } else { + // fmt.Println(table + " does not exist") + // } + // } + // exists, err := db.Query(query) + // if err != nil { + // logging.Logger.Println("DBTOS3 could not get table names to check if it exists.") + // logging.Logger.Printf("DBTOS3 %s\n", err) + // os.Exit(logging.DB_SCHEMA_SCAN_FAILURE) + // } +} + +// https://stackoverflow.com/a/18479916 +// func readLines(path string) string { +// // file, err := os.Open(path) +// // if err != nil { +// // return nil, err +// // } +// // defer file.Close() + +// // var list_of_tables []string +// // scanner := bufio.NewScanner(file) +// // for scanner.Scan() { +// // list_of_tables = append(list_of_tables, scanner.Text()) +// // } +// // return list_of_tables, scanner.Err() + +// file, err := os.Open(path) +// if err != nil { +// log.Fatal(err) +// } +// defer file.Close() + +// scanner := bufio.NewScanner(file) +// for scanner.Scan() { +// I := scanner.Text() +// } + +// if err := scanner.Err(); err != nil { +// log.Fatal(err) +// } + +// } + +// https://stackoverflow.com/a/15323988 +// func stringInSlice(table string, list_of_tables []string) bool { +// logging.Logger.Println(reflect.DeepEqual(table_to_schema, list_of_tables)) +// eq := reflect.DeepEqual(table, list_of_tables) +// if eq { +// logging.Logger.Println("Database and Manifest appear to be in sync.") +// return true +// } else { +// logging.Logger.Println("Database and Manifest appear to differ.") +// return false +// } +// for _, i := range list_of_tables { +// if table == i { +// //logging.Logger.Printf(table + " exists in manifest and database.") +// //logging.Logger.Printf("table: " + table + " appears to be missing.") +// return true +// } +// } +// logging.Logger.Printf("table: " + table + " appears to be missing.") +// return false +// } + +// checkDbCmd represents the checkDb command +var checkDbCmd = &cobra.Command{ + Use: "check_db", + Short: "A brief description of your command", + Long: `A`, + Run: func(cmd *cobra.Command, args []string) { + db_creds := getDBCredentials(source_database) + //stringInSlice(table, list_of_tables) + check_if_table_exists(db_creds) + + }, +} + +func init() { + rootCmd.AddCommand(checkDbCmd) + parseFlags("check_tables", checkDbCmd) +} diff --git a/cmd/root.go b/cmd/root.go index 5faf79c..425ddbe 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -57,6 +57,10 @@ func parseFlags(cmd_name string, cmd *cobra.Command) { fmt.Println("RUNNING TRUNCATE FLAGS") cmd.Flags().StringVarP(&truncate_db, "db", "", "", "target database name") cmd.Flags().StringSliceVarP(&truncate_truncate, "truncate", "", []string{}, "tables to truncate before load") + case "check_tables": + fmt.Println("RUNNING CHECK_TABLES FLAGS") + cmd.Flags().StringVarP(&source_database, "db", "", "", "database name") + cmd.MarkFlagRequired("source_database") default: fmt.Printf("NO FLAGS PROCESSED") } diff --git a/db_tables.txt b/db_tables.txt new file mode 100644 index 0000000..bed8911 --- /dev/null +++ b/db_tables.txt @@ -0,0 +1,134 @@ +census_duns21 +census_eins18 +census_cfda19 +census_duns18 +census_eins16 +census_cpas21 +census_cpas17 +census_agency16 +census_captext19 +census_captext_formatted21 +census_cfda20 +census_captext20 +census_agency17 +census_captext21 +dissemination_note +census_cfda16 +census_duns19 +census_cpas18 +census_cpas22 +census_agency19 +census_eins17 +census_duns22 +census_cfda21 +census_captext_formatted19 +census_cpas19 +census_duns16 +census_agency20 +census_cfda22 +census_cfda17 +census_notes20 +census_duns17 +census_cfda18 +census_cpas20 +census_duns20 +census_captext_formatted20 +census_agency22 +census_cpas16 +census_findings18 +census_eins19 +census_findings16 +census_findings20 +census_findings22 +census_findings21 +census_eins21 +census_eins22 +census_eins20 +census_findings19 +census_findings17 +census_findingstext_formatted22 +census_findingstext21 +census_gen16 +census_gen18 +census_findingstext_formatted19 +census_gen17 +census_findingstext_formatted20 +census_findingstext_formatted21 +census_findingstext19 +census_findingstext20 +census_gen22 +census_notes21 +census_notes19 +census_passthrough16 +census_passthrough18 +census_gen19 +census_gen21 +census_gen20 +census_passthrough17 +census_notes22 +census_passthrough22 +census_revisions22 +census_ueis22 +census_revisions19 +census_revisions21 +census_revisions20 +census_passthrough20 +census_passthrough19 +census_passthrough21 +audit_deletedaccess +census_findingstext22 +auth_group_permissions +django_content_type +auth_permission +django_migrations +auth_group +auth_user +auth_user_user_permissions +django_admin_log +auth_user_groups +audit_submissionevent +authtoken_token +audit_singleauditreportfile +census_historical_migration_elecauditfindings +census_historical_migration_eleceins +census_historical_migration_elecfindingstext +census_historical_migration_eleccpas +census_historical_migration_elecnotes +census_historical_migration_elecauditheader +census_historical_migration_elecaudits +census_historical_migration_eleccaptext +dissemination_additionalein +census_historical_migration_reportmigrationstatus +census_historical_migration_migrationerrordetail +census_historical_migration_elecueis +census_historical_migration_elecpassthrough +dissemination_finding +support_administrative_key_uuids +dissemination_general +dissemination_migrationinspectionrecord +django_session +dissemination_issuedescriptionrecord +dissemination_invalidauditrecord +users_permission +support_adminapievent +users_staffuserlog +users_userprofile +support_cognizantassignment +users_staffuser +support_cognizantbaseline +dissemination_tribalapiaccesskeyids +dissemination_findingtext +dissemination_additionaluei +dissemination_captext +users_userpermission +dissemination_onetimeaccess +dissemination_federalaward +dissemination_passthrough +audit_singleauditchecklist +census_captext22 +census_captext_formatted22 +census_agency21 +dissemination_secondaryauditor +census_agency18 +audit_excelfile +audit_access From 2f04a3832a85431defcab2d1221e27f7d8429d88 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Tue, 11 Jun 2024 12:16:20 -0400 Subject: [PATCH 02/10] Add error to logging call --- cmd/check_db.go | 6 +++--- internal/logging/logging.go | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/check_db.go b/cmd/check_db.go index 64330b1..f871249 100644 --- a/cmd/check_db.go +++ b/cmd/check_db.go @@ -55,10 +55,10 @@ func check_if_table_exists(source_creds vcap.Credentials) { } } if len(not_existing) > 0 { - logging.Logger.Println("An array of tables that does not exist in the database, but does exist in a manifest has been returned.") - logging.Logger.Println("System exiting...") + logging.Error.Println("An array of tables that does not exist in the database, but does exist in a manifest has been returned.") + logging.Error.Println("System exiting...") joined_tables := strings.Join(not_existing[:], " ") - logging.Logger.Printf(joined_tables) + logging.Error.Printf(joined_tables) os.Exit(3) } else { logging.Logger.Printf("Manifest and Database tables appear to be in sync.") diff --git a/internal/logging/logging.go b/internal/logging/logging.go index 5fb502c..3de1de5 100644 --- a/internal/logging/logging.go +++ b/internal/logging/logging.go @@ -6,3 +6,4 @@ import ( ) var Logger = log.New(os.Stdout, "INFO: ", log.Ldate|log.Ltime) +var Error = log.New(os.Stdout, "ERROR: ", log.Ldate|log.Ltime) From a5015157eede952bca09a27a7a2b86b3a9ce8839 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:53:59 -0400 Subject: [PATCH 03/10] Limit rows returned --- cmd/check_db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/check_db.go b/cmd/check_db.go index f871249..caa4329 100644 --- a/cmd/check_db.go +++ b/cmd/check_db.go @@ -39,7 +39,7 @@ func check_if_table_exists(source_creds vcap.Credentials) { var not_existing []string for scanner.Scan() { //scanner.Text() - query := fmt.Sprintf("select * from %s ;", scanner.Text()) + query := fmt.Sprintf("select * from %s LIMIT 1;", scanner.Text()) //query := fmt.Sprintf("SELECT EXISTS (SELECT FROM pg_tables WHERE schemaname = 'public' AND tablename = '%s')", scanner.Text()) rows, table_check := db.Query(query) From 00e745ec56922fda33ce393313096e09b406b21a Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:54:31 -0400 Subject: [PATCH 04/10] Add new error code --- cmd/check_db.go | 1 + internal/logging/error_codes.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/cmd/check_db.go b/cmd/check_db.go index caa4329..7dfdfb6 100644 --- a/cmd/check_db.go +++ b/cmd/check_db.go @@ -60,6 +60,7 @@ func check_if_table_exists(source_creds vcap.Credentials) { joined_tables := strings.Join(not_existing[:], " ") logging.Error.Printf(joined_tables) os.Exit(3) + os.Exit(logging.DB_MISSING_TABLES) } else { logging.Logger.Printf("Manifest and Database tables appear to be in sync.") } diff --git a/internal/logging/error_codes.go b/internal/logging/error_codes.go index 4e08914..8fbd992 100644 --- a/internal/logging/error_codes.go +++ b/internal/logging/error_codes.go @@ -8,3 +8,5 @@ const PIPE_FAILURE = -30 const DB_SCHEMA_SCAN_FAILURE = -40 const DB_TRUNCATE_ERROR = -41 + +const DB_MISSING_TABLES = -50 From a29366d8f299e804570ae8818e63c4ce56676f58 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:54:43 -0400 Subject: [PATCH 05/10] Code cleanup --- cmd/check_db.go | 79 ++----------------------------------------------- 1 file changed, 2 insertions(+), 77 deletions(-) diff --git a/cmd/check_db.go b/cmd/check_db.go index 7dfdfb6..96e72be 100644 --- a/cmd/check_db.go +++ b/cmd/check_db.go @@ -55,11 +55,10 @@ func check_if_table_exists(source_creds vcap.Credentials) { } } if len(not_existing) > 0 { - logging.Error.Println("An array of tables that does not exist in the database, but does exist in a manifest has been returned.") + logging.Error.Println("A list of tables that does not exist in the database, but does exist in a manifest has been returned.") logging.Error.Println("System exiting...") joined_tables := strings.Join(not_existing[:], " ") - logging.Error.Printf(joined_tables) - os.Exit(3) + logging.Error.Printf("DBMISSINGTABLES " + joined_tables) os.Exit(logging.DB_MISSING_TABLES) } else { logging.Logger.Printf("Manifest and Database tables appear to be in sync.") @@ -68,82 +67,8 @@ func check_if_table_exists(source_creds vcap.Credentials) { if err := scanner.Err(); err != nil { log.Fatal(err) } - - //for table := range table_to_schema { - // for table := range list_of_tables { - // //"SELECT schemaname, tablename FROM pg_tables WHERE schemaname = 'public'" - // query := fmt.Sprintf("SELECT EXISTS (SELECT FROM pg_tables WHERE schemaname = 'public' AND tablename = '%s')", table) - // rows, table_check := db.Query(query) - - // if table_check == nil { - // fmt.Printf(table + " exists") - // rows.Close() - // } else { - // fmt.Println(table + " does not exist") - // } - // } - // exists, err := db.Query(query) - // if err != nil { - // logging.Logger.Println("DBTOS3 could not get table names to check if it exists.") - // logging.Logger.Printf("DBTOS3 %s\n", err) - // os.Exit(logging.DB_SCHEMA_SCAN_FAILURE) - // } } -// https://stackoverflow.com/a/18479916 -// func readLines(path string) string { -// // file, err := os.Open(path) -// // if err != nil { -// // return nil, err -// // } -// // defer file.Close() - -// // var list_of_tables []string -// // scanner := bufio.NewScanner(file) -// // for scanner.Scan() { -// // list_of_tables = append(list_of_tables, scanner.Text()) -// // } -// // return list_of_tables, scanner.Err() - -// file, err := os.Open(path) -// if err != nil { -// log.Fatal(err) -// } -// defer file.Close() - -// scanner := bufio.NewScanner(file) -// for scanner.Scan() { -// I := scanner.Text() -// } - -// if err := scanner.Err(); err != nil { -// log.Fatal(err) -// } - -// } - -// https://stackoverflow.com/a/15323988 -// func stringInSlice(table string, list_of_tables []string) bool { -// logging.Logger.Println(reflect.DeepEqual(table_to_schema, list_of_tables)) -// eq := reflect.DeepEqual(table, list_of_tables) -// if eq { -// logging.Logger.Println("Database and Manifest appear to be in sync.") -// return true -// } else { -// logging.Logger.Println("Database and Manifest appear to differ.") -// return false -// } -// for _, i := range list_of_tables { -// if table == i { -// //logging.Logger.Printf(table + " exists in manifest and database.") -// //logging.Logger.Printf("table: " + table + " appears to be missing.") -// return true -// } -// } -// logging.Logger.Printf("table: " + table + " appears to be missing.") -// return false -// } - // checkDbCmd represents the checkDb command var checkDbCmd = &cobra.Command{ Use: "check_db", From 126262b9cb0d6a475842dd3cf01e82a187bc6a90 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:37:48 -0400 Subject: [PATCH 06/10] Embedded fs --- cmd/check_db.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmd/check_db.go b/cmd/check_db.go index 96e72be..495d6f3 100644 --- a/cmd/check_db.go +++ b/cmd/check_db.go @@ -29,6 +29,11 @@ func check_if_table_exists(source_creds vcap.Credentials) { os.Exit(logging.DB_SCHEMA_SCAN_FAILURE) } + ////go:embed hello.txt + // var f embed.FS + // file, err := f.ReadFile("db_tables.txt") + // print(string(file)) + file, err := os.Open("db_tables.txt") if err != nil { log.Fatal(err) From 990bf63ee42df4b6d23a68b6d9e7e60872d10e92 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Fri, 14 Jun 2024 13:49:50 -0400 Subject: [PATCH 07/10] Potential embed.FS method for table manifest --- db_tables.txt => cmd/assets/db_tables.txt | 0 cmd/check_db.go | 21 +++++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) rename db_tables.txt => cmd/assets/db_tables.txt (100%) diff --git a/db_tables.txt b/cmd/assets/db_tables.txt similarity index 100% rename from db_tables.txt rename to cmd/assets/db_tables.txt diff --git a/cmd/check_db.go b/cmd/check_db.go index 495d6f3..e3bbb51 100644 --- a/cmd/check_db.go +++ b/cmd/check_db.go @@ -6,6 +6,7 @@ package cmd import ( "bufio" "database/sql" + "embed" "fmt" "log" "os" @@ -18,10 +19,12 @@ import ( var ( source_database string + //go:embed assets/db_tables.txt + f embed.FS ) func check_if_table_exists(source_creds vcap.Credentials) { - //SELECT EXISTS (SELECT FROM pg_tables WHERE schemaname = 'schema_name' AND tablename = 'table_name'); + // SELECT EXISTS (SELECT FROM pg_tables WHERE schemaname = 'schema_name' AND tablename = 'table_name'); db, err := sql.Open("postgres", source_creds.Get("uri").String()) if err != nil { logging.Logger.Println("TABLECHECK could not connect to DB for checking table existance") @@ -29,18 +32,20 @@ func check_if_table_exists(source_creds vcap.Credentials) { os.Exit(logging.DB_SCHEMA_SCAN_FAILURE) } - ////go:embed hello.txt - // var f embed.FS - // file, err := f.ReadFile("db_tables.txt") - // print(string(file)) + // file, err := os.Open("db_tables.txt") + // if err != nil { + // log.Fatal(err) + // } + // defer file.Close() - file, err := os.Open("db_tables.txt") + file, err := f.ReadFile("assets/db_tables.txt") + //print(string(file)) if err != nil { log.Fatal(err) } - defer file.Close() - scanner := bufio.NewScanner(file) + scanner := bufio.NewScanner(strings.NewReader(string(file))) + //scanner := bufio.NewScanner(file) var not_existing []string for scanner.Scan() { //scanner.Text() From 17b189f00fc6a0f83207c57e60ada4dc55ed60f6 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Mon, 17 Jun 2024 14:56:02 -0400 Subject: [PATCH 08/10] Add log - STATUS --- internal/logging/logging.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/logging/logging.go b/internal/logging/logging.go index 3de1de5..7edb81e 100644 --- a/internal/logging/logging.go +++ b/internal/logging/logging.go @@ -7,3 +7,4 @@ import ( var Logger = log.New(os.Stdout, "INFO: ", log.Ldate|log.Ltime) var Error = log.New(os.Stdout, "ERROR: ", log.Ldate|log.Ltime) +var Status = log.New(os.Stdout, "STATUS: ", log.Ldate|log.Ltime) From 723810937dcd83088e73621220fec6c19083d828 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Mon, 17 Jun 2024 14:56:20 -0400 Subject: [PATCH 09/10] Change log output --- cmd/check_db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/check_db.go b/cmd/check_db.go index e3bbb51..84a34b2 100644 --- a/cmd/check_db.go +++ b/cmd/check_db.go @@ -71,7 +71,7 @@ func check_if_table_exists(source_creds vcap.Credentials) { logging.Error.Printf("DBMISSINGTABLES " + joined_tables) os.Exit(logging.DB_MISSING_TABLES) } else { - logging.Logger.Printf("Manifest and Database tables appear to be in sync.") + logging.Status.Printf("Manifest and Database tables appear to be in sync.") } if err := scanner.Err(); err != nil { From ae53e16b4f7a5feb44e711a2796a4f28e3fc60c0 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Mon, 17 Jun 2024 14:56:35 -0400 Subject: [PATCH 10/10] Check if table exists before entering restore loop --- cmd/s3_to_db.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/s3_to_db.go b/cmd/s3_to_db.go index d1e6694..b56ee3c 100644 --- a/cmd/s3_to_db.go +++ b/cmd/s3_to_db.go @@ -24,6 +24,7 @@ func bucket_to_local_tables( ) { table_to_schema := get_table_and_schema_names(db_creds) //fmt.Sprintf("%s%s/%s-%s.dump", s3path.Bucket, s3path.Key, schema, table) + check_if_table_exists(db_creds) for table, schema := range table_to_schema { dump_file_name := fmt.Sprintf("%s-%s.dump", schema, table)