diff --git a/README.md b/README.md index 31adbd1..e365fcf 100644 --- a/README.md +++ b/README.md @@ -1,105 +1,115 @@ -# go-backup - -## while developing/testing - -``` -go run main.go -``` - -## to build - -``` -./build.sh -``` - -## Usage: clone - -``` -gov.gsa.fac.cgov-util clone --source-db --destination-db -``` - -This command clones one DB to another by piping STDOUT from `pg_dump` into the STDIN of `psql`, with the correct connection/credential parameters for each command. - -When run localling (assuming `ENV` is set to `LOCAL`) it will read a `config.json` from the directory `$HOME/.fac/config.json` (or, from `config.json` in the same folder as the application). This file should look like a `VCAP_SERVICES` variable that would be encountered in the Cloud Foundry/cloud.gov environment. - -When run in the cloud.gov environment (where `ENV` is anything other than `LOCAL` or `TESTING`), it will look at `$VCAP_SERVICES`, look in the `aws-rds` key, and look up the DB credentials by the friendly name provided on the command line. By this, if your brokered DB is called `fac-db`, this will then populate the credentials (internally) with the brokered DB name, password, URI, etc. in order to correctly `pg_dump` from one and, using another set of credentials, stream the data into another. - -This does *not* guarantee a perfect backup. It *does* do a rapid snapshot at a moment in time, without requiring the application to write any files to the local filesystem within a container. (On cloud.gov, this is limited to ~6GB, which makes dumping and loading DBs difficult.) - -## Usage: bucket - -``` -gsa.gov.fac.cgov-util bucket --source-db --destination-bucket -``` - -Similar to above, but this pipes a `pg_dump` to `s3 copy`. - -For now, this writes to the key `s3:///backups/-.dump` - -This wants to be improved. - -The purpose here is to (again) dump a database to a storage location without touching the local (containerized) filesystem. It uses friendly names, again, to look up the credentials for both the RDS database and brokered S3 in order to stream a DB dump to S3. (In theory, S3 does multipart uploads, so you should end up with a single file, up to 5TB in size, for your dump.) - -When running locally, this assumes `minio` is running as a stand-in for S3, and is specified as a `user-specified` service in the (local, bogus) VCAP_SERVICES config. - -(An example `config.json` is in this repository, and a more complete file in `internal/vcap/vcap_test.go`). - - -## Assumptions - -* The `ENV` var is set to `LOCAL` for local testing. i.e `export ENV="LOCAL"` -* You have two Postgres containers running, one at port 5432, and another at 5431. - -You can change the local DB values in `config.yaml` to reflect your config. - -In a remote environment, the variable `VCAP_SERVICES` is referenced to extract values. - -## Minio on Windows -- Open powershell as administrator to download the tool. -- Move `C:\mc.exe` to the root of the project folder. -``` -Invoke-WebRequest -Uri "https://dl.minio.io/client/mc/release/windows-amd64/mc.exe" -OutFile "C:\mc.exe" -``` - -## Adding a New Command -- We utilize cobra-cli as a basis for creating commands in [cmd](./cmd/). It will generate a template file for use, prepoulated with necessary information to start building a command. -- More information can be found at the [cobra-cli readme](https://github.com/spf13/cobra-cli/blob/main/README.md) -``` -cobra-cli add -``` - -## Common Command Usage - -- Fetch the latest release when running on a cloud.gov environment: -``` -# With Proxy: -curl -x $https_proxy -L "https://github.com/GSA-TTS/fac-backup-utility/releases/download/vX.Y.ZZ/gov.gsa.fac.cgov-util-vX.Y.ZZ-linux-amd64.tar.gz" -O - -# Without Proxy -curl -L "https://github.com/GSA-TTS/fac-backup-utility/releases/download/vX.Y.ZZ/gov.gsa.fac.cgov-util-vX.Y.ZZ-linux-amd64.tar.gz" -O - -tar -xvf gov.gsa.fac.cgov-util-vX.Y.ZZ-linux-amd64.tar.gz && rm gov.gsa.fac.cgov-util-vX.Y.ZZ-linux-amd64.tar.gz -``` - -- Install AWS CLI on cloud.gov instances: - - It is advised to not run this on a local machine, due to where aws will be installed and could potentially add conflicts. Please install AWS CLI on your local environment using the official methods provided by AWS for your OS. -``` -./gov.gsa.fac.cgov-util install_aws -``` - -- Use AWS S3 Sync to sync the contents of two s3 buckets: -``` -./gov.gsa.fac.cgov-util s3_sync --source_s3 s3:/// --dest_s3 s3:/// -``` - -- Backup an Postgres instance to an s3 using psql .bin files: -``` -./gov.gsa.fac.cgov-util s3_to_db --db --s3path s3:///path/to/store/ -``` - -- Backup Postgres Tables to another Postgres instance: - - This requires a secondary postgres in your docker compose, with the expected `5431:5432` ports, while the primary runs on `5432:5432`. These can be changed if desired. Port changes (if applicable) are only required for local testing & development. - - When running on cloud.gov environements, the port and URI are contained in `$VCAP_SERVICES` for the `src_db` and `dest_db` and thus require no changes. -``` -./gov.gsa.fac.cgov-util db_to_db --src_db --dest_db -``` +# go-backup + +## while developing/testing + +``` +go run main.go +``` + +## to build + +``` +./build.sh +``` + +## Usage: clone + +``` +gov.gsa.fac.cgov-util clone --source-db --destination-db +``` + +This command clones one DB to another by piping STDOUT from `pg_dump` into the STDIN of `psql`, with the correct connection/credential parameters for each command. + +When run localling (assuming `ENV` is set to `LOCAL`) it will read a `config.json` from the directory `$HOME/.fac/config.json` (or, from `config.json` in the same folder as the application). This file should look like a `VCAP_SERVICES` variable that would be encountered in the Cloud Foundry/cloud.gov environment. + +When run in the cloud.gov environment (where `ENV` is anything other than `LOCAL` or `TESTING`), it will look at `$VCAP_SERVICES`, look in the `aws-rds` key, and look up the DB credentials by the friendly name provided on the command line. By this, if your brokered DB is called `fac-db`, this will then populate the credentials (internally) with the brokered DB name, password, URI, etc. in order to correctly `pg_dump` from one and, using another set of credentials, stream the data into another. + +This does *not* guarantee a perfect backup. It *does* do a rapid snapshot at a moment in time, without requiring the application to write any files to the local filesystem within a container. (On cloud.gov, this is limited to ~6GB, which makes dumping and loading DBs difficult.) + +## Usage: bucket + +``` +gsa.gov.fac.cgov-util bucket --source-db --destination-bucket +``` + +Similar to above, but this pipes a `pg_dump` to `s3 copy`. + +For now, this writes to the key `s3:///backups/-.dump` + +This wants to be improved. + +The purpose here is to (again) dump a database to a storage location without touching the local (containerized) filesystem. It uses friendly names, again, to look up the credentials for both the RDS database and brokered S3 in order to stream a DB dump to S3. (In theory, S3 does multipart uploads, so you should end up with a single file, up to 5TB in size, for your dump.) + +When running locally, this assumes `minio` is running as a stand-in for S3, and is specified as a `user-specified` service in the (local, bogus) VCAP_SERVICES config. + +(An example `config.json` is in this repository, and a more complete file in `internal/vcap/vcap_test.go`). + + +## Assumptions + +* The `ENV` var is set to `LOCAL` for local testing. i.e `export ENV="LOCAL"` +* You have two Postgres containers running, one at port 5432, and another at 5431. + +You can change the local DB values in `config.yaml` to reflect your config. + +In a remote environment, the variable `VCAP_SERVICES` is referenced to extract values. + +## Minio on Windows +- Open powershell as administrator to download the tool. +- Move `C:\mc.exe` to the root of the project folder. +``` +Invoke-WebRequest -Uri "https://dl.minio.io/client/mc/release/windows-amd64/mc.exe" -OutFile "C:\mc.exe" +``` + +## Adding a New Command +- We utilize cobra-cli as a basis for creating commands in [cmd](./cmd/). It will generate a template file for use, prepoulated with necessary information to start building a command. +- More information can be found at the [cobra-cli readme](https://github.com/spf13/cobra-cli/blob/main/README.md) +``` +cobra-cli add +``` + +## Common Command Usage + +- Fetch the latest release when running on a cloud.gov environment: +``` +# With Proxy: +curl -x $https_proxy -L "https://github.com/GSA-TTS/fac-backup-utility/releases/download/vX.Y.ZZ/gov.gsa.fac.cgov-util-vX.Y.ZZ-linux-amd64.tar.gz" -O + +# Without Proxy +curl -L "https://github.com/GSA-TTS/fac-backup-utility/releases/download/vX.Y.ZZ/gov.gsa.fac.cgov-util-vX.Y.ZZ-linux-amd64.tar.gz" -O + +tar -xvf gov.gsa.fac.cgov-util-vX.Y.ZZ-linux-amd64.tar.gz && rm gov.gsa.fac.cgov-util-vX.Y.ZZ-linux-amd64.tar.gz +``` + +- Install AWS CLI on cloud.gov instances: + - It is advised to not run this on a local machine, due to where aws will be installed and could potentially add conflicts. Please install AWS CLI on your local environment using the official methods provided by AWS for your OS. +``` +./gov.gsa.fac.cgov-util install_aws +``` + +- Use AWS S3 Sync to sync the contents of two s3 buckets: +``` +./gov.gsa.fac.cgov-util s3_sync --source_s3 s3:/// --dest_s3 s3:/// +``` + +- Backup an Postgres instance to an s3 using psql .bin files: +``` +./gov.gsa.fac.cgov-util s3_to_db --db --s3path s3:///path/to/store/ +``` + +- Backup Postgres Tables to another Postgres instance: + - This requires a secondary postgres in your docker compose, with the expected `5431:5432` ports, while the primary runs on `5432:5432`. These can be changed if desired. Port changes (if applicable) are only required for local testing & development. + - When running on cloud.gov environements, the port and URI are contained in `$VCAP_SERVICES` for the `src_db` and `dest_db` and thus require no changes. +``` +./gov.gsa.fac.cgov-util db_to_db --src_db --dest_db +``` + +- Check db for missing tables, based on the [manifest](./cmd/assets/db_tables.txt) +``` +./gov.gsa.fac.cgov-util check_db --db +``` + +- Check row count for a table existing in the db, based on the [manifest](./cmd/assets/db_tables.txt) +``` +./gov.gsa.fac.cgov-util row_count --db +``` diff --git a/cmd/check_db.go b/cmd/check_db.go index ae49fb9..eb66b7d 100644 --- a/cmd/check_db.go +++ b/cmd/check_db.go @@ -61,14 +61,14 @@ func check_if_table_exists(source_creds vcap.Credentials) { // store all scanner.Text() into a map // if map != nil // hard exit - not_existing = append(not_existing, scanner.Text()) + not_existing = append(not_existing, "Missing Table: "+scanner.Text()) } } if len(not_existing) > 0 { logging.Error.Println("CHECKTABLESFAIL : A list of tables that does not exist in the database, but does exist in a manifest has been returned.") logging.Error.Println("System exiting...") - joined_tables := strings.Join(not_existing[:], " ") - logging.Error.Printf("DBMISSINGTABLES " + joined_tables) + joined_tables := strings.Join(not_existing[:], "\n") + logging.Error.Printf("DBMISSINGTABLES \n" + joined_tables) os.Exit(logging.DB_MISSING_TABLES) } else { logging.Status.Printf("CHECKTABLESPASS : Manifest and Database tables appear to be in sync for database: " + source_database) @@ -82,8 +82,8 @@ func check_if_table_exists(source_creds vcap.Credentials) { // checkDbCmd represents the checkDb command var checkDbCmd = &cobra.Command{ Use: "check_db", - Short: "A brief description of your command", - Long: `A`, + Short: "Check the database against the manifest and determine if there are any missing tables.", + Long: `Check the database against the manifest and determine if there are any missing tables.`, Run: func(cmd *cobra.Command, args []string) { db_creds := getDBCredentials(source_database) //stringInSlice(table, list_of_tables) diff --git a/cmd/root.go b/cmd/root.go index 80ba1c1..9e3b452 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -1,85 +1,89 @@ -/* -Copyright © 2024 NAME HERE -*/ -package cmd - -import ( - "fmt" - "os" - - "github.com/spf13/cobra" -) - -var ( - // For S3 operations - // db string - // s3path string - // truncate []string - - // // For db-to-db operations - // source_db string - // destination_db string - - // rootCmd represents the base command when called without any subcommands - rootCmd = &cobra.Command{ - Use: "cgov-util", - Short: "A cgov multitool", - Long: ``, - Run: func(cmd *cobra.Command, args []string) { - fmt.Println(args) - }, - } -) - -func parseFlags(cmd_name string, cmd *cobra.Command) { - switch cmd_name { - case "s3_to_db": - fmt.Println("RUNNING S3_TO_DB FLAGS") - cmd.Flags().StringVarP(&s3_to_db_s3path, "s3path", "", "", "destination path") - cmd.Flags().StringVarP(&s3_to_db_db, "db", "", "", "target database name") - cmd.MarkFlagRequired("db") - cmd.MarkFlagRequired("s3path") - case "db_to_s3": - fmt.Println("RUNNING DB_TO_S3 FLAGS") - cmd.Flags().StringVarP(&db_to_s3_db, "db", "", "", "source database name") - cmd.Flags().StringVarP(&db_to_s3_s3path, "s3path", "", "", "destination path") - cmd.MarkFlagRequired("db") - cmd.MarkFlagRequired("s3path") - case "db_to_db": - fmt.Println("RUNNING DB_TO_DB FLAGS") - cmd.Flags().StringVarP(&source_db, "src_db", "", "", "source database name") - cmd.Flags().StringVarP(&dest_db, "dest_db", "", "", "destination database name") - cmd.Flags().StringVarP(&operation, "operation", "", "", "operation (initial/backup/restore)") - cmd.MarkFlagRequired("src_db") - cmd.MarkFlagRequired("dest_db") - cmd.MarkFlagRequired("operation") - case "truncate": - fmt.Println("RUNNING TRUNCATE FLAGS") - cmd.Flags().StringVarP(&truncate_db, "db", "", "", "target database name") - cmd.Flags().StringSliceVarP(&truncate_truncate, "truncate", "", []string{}, "tables to truncate before load") - case "check_tables": - fmt.Println("RUNNING CHECK_TABLES FLAGS") - cmd.Flags().StringVarP(&source_database, "db", "", "", "database name") - cmd.MarkFlagRequired("source_database") - case "drop": - fmt.Println("RUNNING DROP FLAGS") - cmd.Flags().StringVarP(&target_db, "db", "", "", "target database name") - cmd.Flags().StringSliceVarP(&drop, "drop", "", []string{}, "tables to drop before backup") - default: - fmt.Printf("NO FLAGS PROCESSED") - } - -} - -// Execute adds all child commands to the root command and sets flags appropriately. -// This is called by main.main(). It only needs to happen once to the rootCmd. -func Execute() { - err := rootCmd.Execute() - if err != nil { - os.Exit(1) - } -} - -func init() { - -} +/* +Copyright © 2024 NAME HERE +*/ +package cmd + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +var ( + // For S3 operations + // db string + // s3path string + // truncate []string + + // // For db-to-db operations + // source_db string + // destination_db string + + // rootCmd represents the base command when called without any subcommands + rootCmd = &cobra.Command{ + Use: "cgov-util", + Short: "A cgov multitool", + Long: ``, + Run: func(cmd *cobra.Command, args []string) { + fmt.Println(args) + }, + } +) + +func parseFlags(cmd_name string, cmd *cobra.Command) { + switch cmd_name { + case "s3_to_db": + fmt.Println("RUNNING S3_TO_DB FLAGS") + cmd.Flags().StringVarP(&s3_to_db_s3path, "s3path", "", "", "destination path") + cmd.Flags().StringVarP(&s3_to_db_db, "db", "", "", "target database name") + cmd.MarkFlagRequired("db") + cmd.MarkFlagRequired("s3path") + case "db_to_s3": + fmt.Println("RUNNING DB_TO_S3 FLAGS") + cmd.Flags().StringVarP(&db_to_s3_db, "db", "", "", "source database name") + cmd.Flags().StringVarP(&db_to_s3_s3path, "s3path", "", "", "destination path") + cmd.MarkFlagRequired("db") + cmd.MarkFlagRequired("s3path") + case "db_to_db": + fmt.Println("RUNNING DB_TO_DB FLAGS") + cmd.Flags().StringVarP(&source_db, "src_db", "", "", "source database name") + cmd.Flags().StringVarP(&dest_db, "dest_db", "", "", "destination database name") + cmd.Flags().StringVarP(&operation, "operation", "", "", "operation (initial/backup/restore)") + cmd.MarkFlagRequired("src_db") + cmd.MarkFlagRequired("dest_db") + cmd.MarkFlagRequired("operation") + case "truncate": + fmt.Println("RUNNING TRUNCATE FLAGS") + cmd.Flags().StringVarP(&truncate_db, "db", "", "", "target database name") + cmd.Flags().StringSliceVarP(&truncate_truncate, "truncate", "", []string{}, "tables to truncate before load") + case "check_tables": + fmt.Println("RUNNING CHECK_TABLES FLAGS") + cmd.Flags().StringVarP(&source_database, "db", "", "", "database name") + cmd.MarkFlagRequired("source_database") + case "drop": + fmt.Println("RUNNING DROP FLAGS") + cmd.Flags().StringVarP(&target_db, "db", "", "", "target database name") + cmd.Flags().StringSliceVarP(&drop, "drop", "", []string{}, "tables to drop before backup") + case "row_count": + fmt.Println("RUNNING ROW_COUNT FLAGS") + cmd.Flags().StringVarP(&row_count_db, "db", "", "", "database name") + cmd.MarkFlagRequired("row_count_db") + default: + fmt.Printf("NO FLAGS PROCESSED") + } + +} + +// Execute adds all child commands to the root command and sets flags appropriately. +// This is called by main.main(). It only needs to happen once to the rootCmd. +func Execute() { + err := rootCmd.Execute() + if err != nil { + os.Exit(1) + } +} + +func init() { + +} diff --git a/cmd/row_count.go b/cmd/row_count.go new file mode 100644 index 0000000..04a280c --- /dev/null +++ b/cmd/row_count.go @@ -0,0 +1,81 @@ +/* +Copyright © 2024 NAME HERE +*/ +package cmd + +import ( + "bufio" + "database/sql" + "fmt" + "os" + "strconv" + "strings" + + "github.com/spf13/cobra" + "gov.gsa.fac.cgov-util/internal/logging" + "gov.gsa.fac.cgov-util/internal/vcap" +) + +var ( + row_count_db string +) + +func check_rows_in_db(source_creds vcap.Credentials) { + db, err := sql.Open("postgres", source_creds.Get("uri").String()) + if err != nil { + logging.Logger.Println("TABLECHECK could not connect to DB for checking table existance") + logging.Logger.Printf("DBTOS3 %s\n", err) + os.Exit(logging.DB_SCHEMA_SCAN_FAILURE) + } + file, err := f.ReadFile("assets/db_tables.txt") + //print(string(file)) + if err != nil { + logging.Error.Println(err) + os.Exit(logging.ROW_COUNT_ERROR) + } + scanner := bufio.NewScanner(strings.NewReader(string(file))) + var row_count_for_tables []string + for scanner.Scan() { + query := fmt.Sprintf("SELECT count(*) FROM %s;", scanner.Text()) + rows, err := db.Query(query) + if err != nil { + logging.Error.Println(err) + os.Exit(logging.ROW_COUNT_ERROR) + } + defer rows.Close() + var count int + for rows.Next() { + if err := rows.Scan(&count); err != nil { + logging.Error.Println(err) + os.Exit(logging.ROW_COUNT_ERROR) + } + } + // Output to stdout on each line for debugging purposes + // logging.Logger.Printf(fmt.Sprintf("Table: %s | Row Count: %d\n", scanner.Text(), count)) + r := strconv.Itoa(count) + // Store in row_count_for_tables []string + row_count_for_tables = append(row_count_for_tables, "Table: "+scanner.Text()+" | Rows: "+r) + } + logging.Logger.Println("Row count for tables in manifest...") + joined_tables := strings.Join(row_count_for_tables[:], "\n") + logging.Logger.Printf("TABLEROWCOUNT\n" + joined_tables) + if err := scanner.Err(); err != nil { + logging.Error.Println(err) + } +} + +// rowCountCmd represents the rowCount command +var rowCountCmd = &cobra.Command{ + Use: "row_count", + Short: "Check the rows in a given table", + Long: `Check the rows in a given table`, + Run: func(cmd *cobra.Command, args []string) { + db_creds := getDBCredentials(row_count_db) + check_rows_in_db(db_creds) + }, +} + +func init() { + rootCmd.AddCommand(rowCountCmd) + parseFlags("row_count", rowCountCmd) +} diff --git a/internal/logging/error_codes.go b/internal/logging/error_codes.go index 7509d4e..b6668b9 100644 --- a/internal/logging/error_codes.go +++ b/internal/logging/error_codes.go @@ -1,14 +1,16 @@ -package logging - -const S3_PATH_PARSE_ERROR = -10 - -const COULD_NOT_FIND_CREDENTIALS = -20 - -const PIPE_FAILURE = -30 - -const DB_SCHEMA_SCAN_FAILURE = -40 -const DB_TRUNCATE_ERROR = -41 -const DB_DROP_ERROR = -42 - -const DB_MISSING_TABLES = -50 -const PROTECTED_DATABASE = -51 +package logging + +const S3_PATH_PARSE_ERROR = -10 + +const COULD_NOT_FIND_CREDENTIALS = -20 + +const PIPE_FAILURE = -30 + +const DB_SCHEMA_SCAN_FAILURE = -40 +const DB_TRUNCATE_ERROR = -41 +const DB_DROP_ERROR = -42 + +const DB_MISSING_TABLES = -50 +const PROTECTED_DATABASE = -51 + +const ROW_COUNT_ERROR = -60