diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 20d5c2fdf..93e465ae8 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -396,6 +396,7 @@ jobs: S2S_SRC_S3_SERVICE_URL: $(S2S_SRC_S3_SERVICE_URL) S2S_SRC_GCP_SERVICE_URL: $(S2S_SRC_GCP_SERVICE_URL) SHARE_SAS_URL: $(SHARE_SAS_URL) + TAMPER_PROOF_ENDPOINT: "https://testazcopy.confidential-ledger.azure.com" GOCOVERDIR: '$(System.DefaultWorkingDirectory)/coverage' condition: succeededOrFailed() # Smoke Tests Publishing diff --git a/cmd/copy.go b/cmd/copy.go index fc49d6c37..93718cdea 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -113,6 +113,7 @@ type rawCopyCmdArgs struct { noGuessMimeType bool preserveLastModifiedTime bool putMd5 bool + tamperProof string md5ValidationOption string CheckLength bool deleteSnapshotsOption string @@ -642,6 +643,7 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { } cooked.putMd5 = raw.putMd5 + cooked.tamperProof = raw.tamperProof err = cooked.md5ValidationOption.Parse(raw.md5ValidationOption) if err != nil { return cooked, err @@ -834,6 +836,9 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { if err = validateMd5Option(cooked.md5ValidationOption, cooked.FromTo); err != nil { return cooked, err } + if err = validateTamperProofOption(cooked.tamperProof, cooked.putMd5, cooked.FromTo); err != nil { + return cooked, err + } // Because of some of our defaults, these must live down here and can't be properly checked. // TODO: Remove the above checks where they can't be done. @@ -1032,6 +1037,14 @@ func validateMd5Option(option common.HashValidationOption, fromTo common.FromTo) return nil } +func validateTamperProofOption(tamperProof string, putMd5 bool, fromTo common.FromTo) error { + // Check to ensure put-md5 flag is present, check-md5 is set by default. + if len(tamperProof) > 0 && !putMd5 && fromTo.IsUpload() { + return fmt.Errorf("put-md5 is false but tamper-proof is set") + } + return nil +} + // Valid tag key and value characters include: // 1. Lowercase and uppercase letters (a-z, A-Z) // 2. Digits (0-9) @@ -1153,6 +1166,7 @@ type CookedCopyCmdArgs struct { preserveLastModifiedTime bool deleteSnapshotsOption common.DeleteSnapshotsOption putMd5 bool + tamperProof string md5ValidationOption common.HashValidationOption CheckLength bool // commandString hold the user given command which is logged to the Job log file @@ -1529,6 +1543,7 @@ func (cca *CookedCopyCmdArgs) processCopyJobPartOrders() (err error) { NoGuessMimeType: cca.noGuessMimeType, PreserveLastModifiedTime: cca.preserveLastModifiedTime, PutMd5: cca.putMd5, + TamperProof: cca.tamperProof, MD5ValidationOption: cca.md5ValidationOption, DeleteSnapshotsOption: cca.deleteSnapshotsOption, // Setting tags when tags explicitly provided by the user through blob-tags flag @@ -2085,6 +2100,7 @@ func init() { cpCmd.PersistentFlags().BoolVar(&raw.forceIfReadOnly, "force-if-read-only", false, "False by default. When overwriting an existing file on Windows or Azure Files, force the overwrite to work even if the existing file has its read-only attribute set") cpCmd.PersistentFlags().BoolVar(&raw.backupMode, common.BackupModeFlagName, false, "False by default. Activates Windows' SeBackupPrivilege for uploads, or SeRestorePrivilege for downloads, to allow AzCopy to see read all files, regardless of their file system permissions, and to restore all permissions. Requires that the account running AzCopy already has these permissions (e.g. has Administrator rights or is a member of the 'Backup Operators' group). All this flag does is activate privileges that the account already has.") cpCmd.PersistentFlags().BoolVar(&raw.putMd5, "put-md5", false, "Create an MD5 hash of each file, and save the hash as the Content-MD5 property of the destination blob or file. By default the hash is NOT created. Only available when uploading.") + cpCmd.PersistentFlags().StringVar(&raw.tamperProof, "tamper-proof", "", "Uploads/ downloads the MD5 hash of each file to a specified tamper-proof storage.") cpCmd.PersistentFlags().StringVar(&raw.md5ValidationOption, "check-md5", common.DefaultHashValidationOption.String(), "Specifies how strictly MD5 hashes should be validated when downloading. Only available when downloading. Available options: NoCheck, LogOnly, FailIfDifferent, FailIfDifferentOrMissing (default 'FailIfDifferent').") cpCmd.PersistentFlags().StringVar(&raw.includeFileAttributes, "include-attributes", "", "(Windows only) Include files whose attributes match the attribute list. For example: A;S;R") cpCmd.PersistentFlags().StringVar(&raw.excludeFileAttributes, "exclude-attributes", "", "(Windows only) Exclude files whose attributes match the attribute list. For example: A;S;R") diff --git a/cmd/sync.go b/cmd/sync.go index 1ab15a5c7..4d6b02c65 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -69,6 +69,7 @@ type rawSyncCmdArgs struct { preserveSymlinks bool backupMode bool putMd5 bool + tamperProof string md5ValidationOption string // this flag indicates the user agreement with respect to deleting the extra files at the destination // which do not exists at source. With this flag turned on/off, users will not be asked for permission. @@ -301,6 +302,8 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { return cooked, err } + cooked.tamperProof = raw.tamperProof + err = cooked.md5ValidationOption.Parse(raw.md5ValidationOption) if err != nil { return cooked, err @@ -412,6 +415,7 @@ type cookedSyncCmdArgs struct { preserveSMBInfo bool preservePOSIXProperties bool putMd5 bool + tamperProof string md5ValidationOption common.HashValidationOption blockSize int64 putBlobSize int64 @@ -818,6 +822,7 @@ func init() { syncCmd.PersistentFlags().StringVar(&raw.deleteDestination, "delete-destination", "false", "Defines whether to delete extra files from the destination that are not present at the source. Could be set to true, false, or prompt. "+ "If set to prompt, the user will be asked a question before scheduling files and blobs for deletion. (default 'false').") syncCmd.PersistentFlags().BoolVar(&raw.putMd5, "put-md5", false, "Create an MD5 hash of each file, and save the hash as the Content-MD5 property of the destination blob or file. (By default the hash is NOT created.) Only available when uploading.") + syncCmd.PersistentFlags().StringVar(&raw.tamperProof, "tamper-proof", "", "Writes the MD5 hash of each file to a specified tamper-proof storage.") syncCmd.PersistentFlags().StringVar(&raw.md5ValidationOption, "check-md5", common.DefaultHashValidationOption.String(), "Specifies how strictly MD5 hashes should be validated when downloading. This option is only available when downloading. Available values include: NoCheck, LogOnly, FailIfDifferent, FailIfDifferentOrMissing. (default 'FailIfDifferent').") syncCmd.PersistentFlags().BoolVar(&raw.s2sPreserveAccessTier, "s2s-preserve-access-tier", true, "Preserve access tier during service to service copy. "+ "Please refer to [Azure Blob storage: hot, cool, and archive access tiers](https://docs.microsoft.com/azure/storage/blobs/storage-blob-storage-tiers) to ensure destination storage account supports setting access tier. "+ diff --git a/cmd/syncEnumerator.go b/cmd/syncEnumerator.go index 8000d5850..0afbe57e8 100644 --- a/cmd/syncEnumerator.go +++ b/cmd/syncEnumerator.go @@ -155,6 +155,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s BlobAttributes: common.BlobTransferAttributes{ PreserveLastModifiedTime: cca.preserveSMBInfo, // true by default for sync so that future syncs have this information available PutMd5: cca.putMd5, + TamperProof: cca.tamperProof, MD5ValidationOption: cca.md5ValidationOption, BlockSizeInBytes: cca.blockSize, PutBlobSizeInBytes: cca.putBlobSize, diff --git a/common/rpc-models.go b/common/rpc-models.go index 1e0901a70..b2fce3bb2 100644 --- a/common/rpc-models.go +++ b/common/rpc-models.go @@ -239,6 +239,7 @@ type BlobTransferAttributes struct { NoGuessMimeType bool // represents user decision to interpret the content-encoding from source file PreserveLastModifiedTime bool // when downloading, tell engine to set file's timestamp to timestamp of blob PutMd5 bool // when uploading, should we create and PUT Content-MD5 hashes + TamperProof string // when uploading/downloading should we upload/download hash from a tamper-proof ledger MD5ValidationOption HashValidationOption // when downloading, how strictly should we validate MD5 hashes? BlockSizeInBytes int64 // when uploading/downloading/copying, specify the size of each chunk PutBlobSizeInBytes int64 // when uploading, specify the threshold to determine if the blob should be uploaded in a single PUT request diff --git a/ste/JobPartPlan.go b/ste/JobPartPlan.go index f4302d4eb..a36a0f8e0 100644 --- a/ste/JobPartPlan.go +++ b/ste/JobPartPlan.go @@ -13,7 +13,7 @@ import ( // dataSchemaVersion defines the data schema version of JobPart order files supported by // current version of azcopy // To be Incremented every time when we release azcopy with changed dataSchema -const DataSchemaVersion common.Version = 18 +const DataSchemaVersion common.Version = 19 const ( CustomHeaderMaxBytes = 256 @@ -302,6 +302,9 @@ type JobPartPlanDstBlob struct { // Controls uploading of MD5 hashes PutMd5 bool + // Specifies tamper proof storage to store MD5 hash + TamperProof string + MetadataLength uint16 Metadata [MetadataMaxBytes]byte diff --git a/ste/JobPartPlanFileName.go b/ste/JobPartPlanFileName.go index c28704ebb..f858a3b08 100644 --- a/ste/JobPartPlanFileName.go +++ b/ste/JobPartPlanFileName.go @@ -194,6 +194,7 @@ func (jpfn JobPartPlanFileName) Create(order common.CopyJobPartOrderRequest) { ContentLanguageLength: uint16(len(order.BlobAttributes.ContentLanguage)), CacheControlLength: uint16(len(order.BlobAttributes.CacheControl)), PutMd5: order.BlobAttributes.PutMd5, // here because it relates to uploads (blob destination) + TamperProof: order.BlobAttributes.TamperProof, BlockBlobTier: order.BlobAttributes.BlockBlobTier, PageBlobTier: order.BlobAttributes.PageBlobTier, MetadataLength: uint16(len(order.BlobAttributes.Metadata)), diff --git a/ste/ledgerHelper.go b/ste/ledgerHelper.go new file mode 100644 index 000000000..3b018f87b --- /dev/null +++ b/ste/ledgerHelper.go @@ -0,0 +1,309 @@ +package ste + +import ( + "context" + "crypto/tls" + "crypto/x509" + "encoding/base64" + "encoding/json" + "fmt" + "hash" + "io" + "log" + "net/http" + "os/exec" + "regexp" + "strings" + "time" + + "github.com/google/uuid" +) + +// Contents represents the structure of the contents field in Entry +type Contents struct { + Path string `json:"path"` + Hash string `json:"hash"` +} + +// Entry represents the structure of each entry in the response +type Entry struct { + Contents string `json:"contents"` + SubLedgerID string `json:"subLedgerId"` + TransactionID string `json:"transactionId"` +} + +// Response represents the structure of the JSON ledger response +type Response struct { + Entries []Entry `json:"entries"` + State string `json:"state"` +} + +// Results from a hash download +type HashResult struct { + Success bool + Message string +} + +const ( + apiVersion = "0.1-preview" + identityURLFmt = "https://identity.confidential-ledger.core.azure.com/ledgerIdentity/%s" + ledgerURLFmt = "%s/app/transactions?api-version=%s&subLedgerId=%s" +) + +// Global regex for extracting storage account information +var storageRegex = regexp.MustCompile(`https://([^.]+)\.blob\.core\.windows\.net/([^/]+)(?:/([^/]+))?`) + +// Create a reusable HTTP client +var httpClient = &http.Client{} + +// Utility function to get the ledger access token +func getLedgerAccessToken() (string, error) { + cmd := "az" + args := []string{"account", "get-access-token", "--resource", "https://confidential-ledger.azure.com"} + out, err := exec.Command(cmd, args...).Output() + if err != nil { + log.Printf("Failed to execute az account get-access-token: %v", err) + return "", err + } + + var data map[string]interface{} + if err := json.Unmarshal(out, &data); err != nil { + log.Printf("Failed to unmarshal access token: %v", err) + return "", err + } + + accessToken, ok := data["accessToken"].(string) + if !ok { + return "", fmt.Errorf("accessToken not found in the response") + } + + return accessToken, nil +} + +// Fetch the ledger identity certificate +func getIdentityCertificate(ledgerUrl string, client *http.Client) (string, error) { + parts := strings.Split(ledgerUrl, ".") + if len(parts) < 2 { + return "", fmt.Errorf("invalid URL format") + } + ledgerName := strings.TrimPrefix(parts[0], "https://") + + identityURL := fmt.Sprintf(identityURLFmt, ledgerName) + response, err := client.Get(identityURL) + if err != nil { + log.Printf("Failed to fetch identity certificate: %v", err) + return "", err + } + defer response.Body.Close() + + var result map[string]interface{} + if err := json.NewDecoder(response.Body).Decode(&result); err != nil { + log.Printf("Failed to decode identity certificate response: %v", err) + return "", err + } + + ledgerTlsCertificate, ok := result["ledgerTlsCertificate"].(string) + if !ok { + return "", fmt.Errorf("ledgerTlsCertificate not found in response") + } + + return ledgerTlsCertificate, nil +} + +// Extract the storage account and subdirectory from the storage location +func getStorageAccount(storageLocation string) (string, error) { + matches := storageRegex.FindStringSubmatch(storageLocation) + if len(matches) < 3 { + return "", fmt.Errorf("invalid storage location format") + } + + storageAccount := matches[1] + container := matches[2] + + newString := fmt.Sprintf("%s-%s", storageAccount, container) + + if len(matches) >= 4 && matches[3] != "" { + subdirectory := matches[3] + if !strings.Contains(subdirectory, ".") { + newString = fmt.Sprintf("%s-%s", newString, subdirectory) + } + } + + return newString, nil +} + +// Create an HTTP client with the provided identity certificate +func createHttpClient(certPEM string) (*http.Client, error) { + certPool := x509.NewCertPool() + if !certPool.AppendCertsFromPEM([]byte(certPEM)) { + return nil, fmt.Errorf("failed to append certs from PEM") + } + + return &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + RootCAs: certPool, + }, + }, + }, nil +} + +// Set common request headers +func setRequestHeaders(req *http.Request) error { + accessToken, err := getLedgerAccessToken() + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+accessToken) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("x-ms-client-request-id", uuid.New().String()) + return nil +} + +// Upload a hash to the ledger +func uploadHash(md5Hasher hash.Hash, tamperProofLocation string, storageDestination string) error { + certPEM, err := getIdentityCertificate(tamperProofLocation, httpClient) + if err != nil { + return err + } + + client, err := createHttpClient(certPEM) + if err != nil { + return err + } + + storageAccount, err := getStorageAccount(storageDestination) + if err != nil { + return err + } + + url := fmt.Sprintf(ledgerURLFmt, tamperProofLocation, apiVersion, storageAccount) + hashSum := md5Hasher.Sum(nil) + hashSumBase64 := base64.StdEncoding.EncodeToString(hashSum) + + var contentString = "{'path': '" + storageDestination + "', 'hash': '" + hashSumBase64 + "'}" + + payload := map[string]interface{}{ + "contents": contentString, + } + + jsonData, err := json.Marshal(payload) + if err != nil { + return err + } + + req, err := http.NewRequest("POST", url, strings.NewReader(string(jsonData))) + if err != nil { + return err + } + + if err := setRequestHeaders(req); err != nil { + return err + } + + response, err := client.Do(req) + if err != nil { + return err + } + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + body, _ := io.ReadAll(response.Body) + return fmt.Errorf("upload failed: %s", string(body)) + } + + return nil +} + +// Download and compare hash from the ledger +func downloadHash(comparison md5Comparer, tamperProofLocation string, storageSource string) (HashResult, error) { + certPEM, err := getIdentityCertificate(tamperProofLocation, httpClient) + if err != nil { + return HashResult{}, err + } + + client, err := createHttpClient(certPEM) + if err != nil { + return HashResult{}, err + } + + storageAccount, err := getStorageAccount(storageSource) + if err != nil { + return HashResult{}, err + } + + url := fmt.Sprintf(ledgerURLFmt, tamperProofLocation, apiVersion, storageAccount) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return HashResult{}, err + } + + if err := setRequestHeaders(req); err != nil { + return HashResult{}, err + } + + response, err := client.Do(req) + if err != nil { + return HashResult{}, err + } + defer response.Body.Close() + + body, err := io.ReadAll(response.Body) + if err != nil { + return HashResult{}, err + } + + var ledgerResponse Response + if err := json.Unmarshal(body, &ledgerResponse); err != nil { + return HashResult{}, err + } + + // Handle long polling for state "Loading" + for ledgerResponse.State == "Loading" { + time.Sleep(5 * time.Second) + + response, err := client.Do(req) + if err != nil { + return HashResult{}, err + } + defer response.Body.Close() + + body, err := io.ReadAll(response.Body) + if err != nil { + return HashResult{}, err + } + + if err := json.Unmarshal(body, &ledgerResponse); err != nil { + return HashResult{}, err + } + } + + for i := len(ledgerResponse.Entries) - 1; i >= 0; i-- { + entry := ledgerResponse.Entries[i] + contentsJSON := strings.ReplaceAll(entry.Contents, "'", `"`) + var contents Contents + if err := json.Unmarshal([]byte(contentsJSON), &contents); err != nil { + continue + } + + if contents.Path == storageSource { + hashSumBase64 := base64.StdEncoding.EncodeToString(comparison.expected) + logMessage := fmt.Sprintf("\n\nComparing hash for '%s' in tamper-proof storage.\n", storageSource) + if contents.Hash != hashSumBase64 { + logMessage := logMessage + fmt.Sprintf("ACL Hash: %s does not match recalculated Hash: %s", contents.Hash, hashSumBase64) + fmt.Println(logMessage) + return HashResult{false, logMessage}, nil + } else { + logMessage := logMessage + fmt.Sprintf("Recalculated Hash: %s matches Hash stored in ACL: %s\n", hashSumBase64, contents.Hash) + fmt.Println(logMessage) + return HashResult{true, logMessage}, nil + } + } + } + + return HashResult{}, nil +} diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go index 62bf9f686..441fc1082 100644 --- a/ste/mgr-JobPartMgr.go +++ b/ste/mgr-JobPartMgr.go @@ -40,6 +40,7 @@ type IJobPartMgr interface { BlobTypeOverride() common.BlobType BlobTiers() (blockBlobTier common.BlockBlobTier, pageBlobTier common.PageBlobTier) ShouldPutMd5() bool + TamperProofLocation() string DeleteDestinationFileIfNecessary() bool SAS() (string, string) // CancelJob() @@ -202,6 +203,9 @@ type jobPartMgr struct { // Additional data shared by all of this Job Part's transfers; initialized when this jobPartMgr is created putMd5 bool + // Additional data shared by all of this Job Part's transfers; initialized when this jobPartMgr is created + tamperProof string + deleteDestinationFileIfNecessary bool metadata common.Metadata @@ -288,6 +292,7 @@ func (jpm *jobPartMgr) ScheduleTransfers(jobCtx context.Context) { } jpm.putMd5 = dstData.PutMd5 + jpm.tamperProof = dstData.TamperProof jpm.blockBlobTier = dstData.BlockBlobTier jpm.pageBlobTier = dstData.PageBlobTier jpm.deleteDestinationFileIfNecessary = dstData.DeleteDestinationFileIfNecessary @@ -572,6 +577,10 @@ func (jpm *jobPartMgr) ShouldPutMd5() bool { return jpm.putMd5 } +func (jpm *jobPartMgr) TamperProofLocation() string { + return jpm.tamperProof +} + func (jpm *jobPartMgr) DeleteDestinationFileIfNecessary() bool { return jpm.deleteDestinationFileIfNecessary } diff --git a/ste/mgr-JobPartTransferMgr.go b/ste/mgr-JobPartTransferMgr.go index c727f7bac..93514eedd 100644 --- a/ste/mgr-JobPartTransferMgr.go +++ b/ste/mgr-JobPartTransferMgr.go @@ -24,6 +24,7 @@ type IJobPartTransferMgr interface { LastModifiedTime() time.Time PreserveLastModifiedTime() (time.Time, bool) ShouldPutMd5() bool + TamperProofLocation() string DeleteDestinationFileIfNecessary() bool MD5ValidationOption() common.HashValidationOption BlobTypeOverride() common.BlobType @@ -554,6 +555,10 @@ func (jptm *jobPartTransferMgr) ShouldPutMd5() bool { return jptm.jobPartMgr.ShouldPutMd5() } +func (jptm *jobPartTransferMgr) TamperProofLocation() string { + return jptm.jobPartMgr.TamperProofLocation() +} + func (jptm *jobPartTransferMgr) DeleteDestinationFileIfNecessary() bool { return jptm.jobPartMgr.DeleteDestinationFileIfNecessary() } diff --git a/ste/testJobPartTransferManager_test.go b/ste/testJobPartTransferManager_test.go index 55d8fb32d..93366f153 100644 --- a/ste/testJobPartTransferManager_test.go +++ b/ste/testJobPartTransferManager_test.go @@ -125,6 +125,10 @@ func (t *testJobPartTransferManager) ShouldPutMd5() bool { panic("implement me") } +func (t *testJobPartTransferManager) TamperProofLocation() string { + panic("implement me") +} + func (t *testJobPartTransferManager) MD5ValidationOption() common.HashValidationOption { panic("implement me") } diff --git a/ste/xfer-anyToRemote-file.go b/ste/xfer-anyToRemote-file.go index 299057535..a499c3b0c 100644 --- a/ste/xfer-anyToRemote-file.go +++ b/ste/xfer-anyToRemote-file.go @@ -25,8 +25,6 @@ import ( "crypto/md5" "errors" "fmt" - "github.com/Azure/azure-sdk-for-go/sdk/azcore" - "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" "hash" "net/http" "net/url" @@ -34,6 +32,9 @@ import ( "strings" "sync" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" + "github.com/Azure/azure-storage-azcopy/v10/common" ) @@ -368,6 +369,7 @@ func anyToRemote_file(jptm IJobPartTransferMgr, info *TransferInfo, pacer pacer, // Step 6: Go through the file and schedule chunk messages to send each chunk scheduleSendChunks(jptm, info.Source, srcFile, srcSize, s, sourceFileFactory, srcInfoProvider) + } var jobCancelledLocalPrefetchErr = errors.New("job was cancelled; Pre-fetching stopped") @@ -488,6 +490,12 @@ func scheduleSendChunks(jptm IJobPartTransferMgr, srcPath string, srcFile common if srcInfoProvider.IsLocal() && safeToUseHash { md5Channel <- md5Hasher.Sum(nil) } + + // Upload MD5 Hash to tamper proof storage + if len(jptm.TamperProofLocation()) > 0 { + uploadHash(md5Hasher, jptm.TamperProofLocation(), jptm.Info().Destination) + } + } // Make reader for this chunk. diff --git a/ste/xfer-remoteToLocal-file.go b/ste/xfer-remoteToLocal-file.go index dfb791945..933b52df3 100644 --- a/ste/xfer-remoteToLocal-file.go +++ b/ste/xfer-remoteToLocal-file.go @@ -409,6 +409,23 @@ func epilogueWithCleanupDownload(jptm IJobPartTransferMgr, dl downloader, active jptm.FailActiveDownload("Checking MD5 hash", err) } + // Download MD5 hash from tamper-proof storage and check with new MD5 hash + if len(jptm.TamperProofLocation()) > 0 { + hashResult, err := downloadHash(comparison, jptm.TamperProofLocation(), jptm.Info().Source) + + if err != nil { + // Handle unexpected errors during the download process + jptm.FailActiveDownload("Downloading ACL MD5 hash", err) + } else if hashResult.Success { + // Log successful hash comparison + jptm.LogAtLevelForCurrentTransfer(common.LogInfo, hashResult.Message) + } else { + // Handle expected hash mismatch scenario + err := errors.New(hashResult.Message) + jptm.FailActiveDownload("Checking ACL MD5 hash", err) + } + } + // check length if enabled (except for dev null and decompression case, where that's impossible) if info.DestLengthValidation && info.Destination != common.Dev_Null && !jptm.ShouldDecompress() { fi, err := common.OSStat(info.getDownloadPath()) diff --git a/testSuite/scripts/run.py b/testSuite/scripts/run.py index 5fdd4e779..d4a254d78 100644 --- a/testSuite/scripts/run.py +++ b/testSuite/scripts/run.py @@ -7,6 +7,8 @@ from test_blob_sync import * from test_service_to_service_copy import * from test_google_cloud_storage_copy import * +from test_hash_download import * +from test_hash_upload import * import glob, os import configparser import platform @@ -93,6 +95,9 @@ def parse_config_file_set_env(): os.environ['S3_TESTS_OFF'] = config['CREDENTIALS']['S3_TESTS_OFF'] os.environ['GCP_TESTS_OFF'] = config['CREDENTIALS']['GCP_TESTS_OFF'] + # set env var for tamper-proof endpoint + os.environ['TAMPER_PROOF_ENDPOINT'] = config['CREDENTIALS']['TAMPER_PROOF_ENDPOINT'] + def check_env_not_exist(key): if os.environ.get(key, '-1') == '-1': print('Environment variable: ' + key + ' not set.') @@ -119,7 +124,8 @@ def init(): check_env_not_exist('FILESYSTEM_URL') or check_env_not_exist('FILESYSTEM_SAS_URL') or \ check_env_not_exist('ACCOUNT_NAME') or check_env_not_exist('ACCOUNT_KEY') or \ check_env_not_exist('S2S_SRC_BLOB_ACCOUNT_SAS_URL') or check_env_not_exist('S2S_DST_BLOB_ACCOUNT_SAS_URL') \ - or check_env_not_exist('S2S_SRC_FILE_ACCOUNT_SAS_URL') or check_env_not_exist('S2S_SRC_S3_SERVICE_URL') or check_env_not_exist('S2S_SRC_GCP_SERVICE_URL'): + or check_env_not_exist('S2S_SRC_FILE_ACCOUNT_SAS_URL') or check_env_not_exist('S2S_SRC_S3_SERVICE_URL') or \ + check_env_not_exist('S2S_SRC_GCP_SERVICE_URL') or check_env_not_exist('TAMPER_PROOF_ENDPOINT'): parse_config_file_set_env() # Get the environment variables value @@ -163,6 +169,8 @@ def init(): # get the s2s copy dest account URLs s2s_dst_blob_account_url = get_env_logged('S2S_DST_BLOB_ACCOUNT_SAS_URL') + tamper_proof_endpoint = get_env_logged('TAMPER_PROOF_ENDPOINT') + get_env_logged("ACCOUNT_NAME") # do NOT log ACCOUNT_KEY @@ -181,7 +189,7 @@ def init(): cleanup() if not util.initialize_test_suite(test_dir_path, container_sas, container_oauth, container_oauth_validate, share_sas_url, premium_container_sas, - filesystem_url, filesystem_sas_url, s2s_src_blob_account_url, s2s_src_file_account_url, s2s_src_s3_service_url, s2s_src_gcp_service_url, s2s_dst_blob_account_url, azcopy_exec_location, test_suite_exec_location): + filesystem_url, filesystem_sas_url, s2s_src_blob_account_url, s2s_src_file_account_url, s2s_src_s3_service_url, s2s_src_gcp_service_url, s2s_dst_blob_account_url, tamper_proof_endpoint, azcopy_exec_location, test_suite_exec_location): print("failed to initialize the test suite with given user input") return else: @@ -200,15 +208,18 @@ def main(): print("Smoke tests starting...") init() - test_class_to_run = [BlobPipingTests, - Blob_Sync_User_Scenario, - Block_Upload_User_Scenarios, - Blob_Download_User_Scenario, - PageBlob_Upload_User_Scenarios, - BlobFs_Upload_ShareKey_User_Scenarios, - BlobFs_Download_SharedKey_User_Scenarios, - Service_2_Service_Copy_User_Scenario, - Google_Cloud_Storage_Copy_User_Scenario] + test_class_to_run = [ + # BlobPipingTests, + # Blob_Sync_User_Scenario, + # Block_Upload_User_Scenarios, + # Blob_Download_User_Scenario, + # PageBlob_Upload_User_Scenarios, + # BlobFs_Upload_ShareKey_User_Scenarios, + # BlobFs_Download_SharedKey_User_Scenarios, + # Service_2_Service_Copy_User_Scenario, + # Google_Cloud_Storage_Copy_User_Scenario, + Hash_Upload_User_Scenarios, + Hash_Download_User_Scenarios] suites_list = [] loader = unittest.TestLoader() diff --git a/testSuite/scripts/test_hash_download.py b/testSuite/scripts/test_hash_download.py new file mode 100644 index 000000000..f192156d5 --- /dev/null +++ b/testSuite/scripts/test_hash_download.py @@ -0,0 +1,113 @@ +from collections import namedtuple +import utility as util +import unittest +import json +import time +import os + +TIMESTAMP = int(time.time()) + +class Hash_Download_User_Scenarios(unittest.TestCase): + # test_single_file_download verifies that a single file is downloaded successfully with the tamper-proof flag set. + def test_single_file_download(self): + + # creating directory with 1 file in it. + num_files = 1 + dir_name = f"download_dir_hash_single_file_{TIMESTAMP}" + dir_n_files_path = util.create_test_n_files(1024, num_files, dir_name) + tamper_proof = util.test_tamper_proof_endpoint + + # upload using azcopy + result = util.Command("copy").add_arguments(dir_n_files_path).add_arguments(util.test_container_url) \ + .add_flags("log-level", "info").add_flags("recursive", "true").add_flags("put-md5", "true"). \ + add_flags("tamper-proof", tamper_proof).execute_azcopy_copy_command() + self.assertTrue(result) + + # verify the uploaded blob + result = util.Command("testBlob").add_arguments(dir_n_files_path).add_arguments(util.test_container_url) \ + .add_flags("is-object-dir", "true").execute_azcopy_verify() + self.assertTrue(result) + + # downloading the uploaded blob to devnull + src = util.get_resource_sas(dir_name) + dst = os.devnull + result = util.Command("copy").add_arguments(src).add_arguments(dst).add_flags("log-level", "info") \ + .add_flags("recursive", "true").add_flags("output-type","json") \ + .add_flags("check-md5", "FailIfDifferentOrMissing").add_flags("tamper-proof", tamper_proof) \ + .execute_azcopy_copy_command_get_output() + result = util.parseAzcopyOutput(result) + try: + # parse the Json Output + x = json.loads(result, object_hook=lambda d: namedtuple('X', d.keys())(*d.values())) + except: + self.fail('error parsing the output in Json Format') + # check results + self.assertEquals(x.TransfersCompleted, str(num_files)) + self.assertEquals(x.TransfersFailed, "0") + + # test_multiple_file_download verifies that multiple files are downloaded successfully with the tamper-proof flag set. + def test_multiple_file_download(self): + + # creating directory with 3 file in it. + num_files = 3 + dir_name = f"download_dir_hash_multiple_file_{TIMESTAMP}" + dir_n_files_path = util.create_test_n_files(1024, num_files, dir_name) + tamper_proof = util.test_tamper_proof_endpoint + + # upload using azcopy + result = util.Command("copy").add_arguments(dir_n_files_path).add_arguments(util.test_container_url) \ + .add_flags("log-level", "info").add_flags("recursive", "true").add_flags("put-md5", "true"). \ + add_flags("tamper-proof", tamper_proof).execute_azcopy_copy_command() + self.assertTrue(result) + + # verify the uploaded blob + result = util.Command("testBlob").add_arguments(dir_n_files_path).add_arguments(util.test_container_url).add_flags("is-object-dir", "true").execute_azcopy_verify() + self.assertTrue(result) + + # downloading the uploaded blob to devnull + src = util.get_resource_sas(dir_name) + dst = os.devnull + result = util.Command("copy").add_arguments(src).add_arguments(dst).add_flags("log-level", "info").add_flags("recursive", "true").add_flags("output-type","json").add_flags("check-md5", "FailIfDifferentOrMissing").add_flags("tamper-proof", tamper_proof).execute_azcopy_copy_command_get_output() + result = util.parseAzcopyOutput(result) + try: + # parse the Json Output + x = json.loads(result, object_hook=lambda d: namedtuple('X', d.keys())(*d.values())) + except: + self.fail('error parsing the output in Json Format') + # check results + self.assertEquals(x.TransfersCompleted, str(num_files)) + self.assertEquals(x.TransfersFailed, "0") + + def test_download_hash_no_match(self): + + dir_name = f"download_dir_hash_single_no_match_file_{TIMESTAMP}" + util.create_test_dir(dir_name) + tamper_proof = util.test_tamper_proof_endpoint + + + # Creating a directory with 1 file in it, uploading it with azcopy and then modifying it. + for i in range(1,2): + dir_n_files_path = util.create_test_file(f"{dir_name}/test.txt", 1024*i) + result = util.Command("copy").add_arguments(f"{util.test_directory_path}/{dir_name}").add_arguments(util.test_container_url) \ + .add_flags("log-level", "info").add_flags("recursive", "true").add_flags("put-md5", "true"). \ + add_flags("tamper-proof", tamper_proof).execute_azcopy_copy_command() + self.assertTrue(result) + + # verify the uploaded blob + result = util.Command("testBlob").add_arguments(f"{util.test_directory_path}/{dir_name}").add_arguments(util.test_container_url).add_flags("is-object-dir", "true").execute_azcopy_verify() + self.assertTrue(result) + + # downloading the uploaded blob to devnull + src = util.get_resource_sas(dir_name) + dst = os.devnull + result = util.Command("copy").add_arguments(src).add_arguments(dst).add_flags("log-level", "info").add_flags("recursive", "true").add_flags("output-type","json").add_flags("check-md5", "FailIfDifferentOrMissing").add_flags("tamper-proof", tamper_proof).execute_azcopy_copy_command_get_output() + result = util.parseAzcopyOutput(result) + try: + # parse the Json Output + x = json.loads(result, object_hook=lambda d: namedtuple('X', d.keys())(*d.values())) + except: + self.fail('error parsing the output in Json Format') + # expected to fail since file was modified + self.assertEquals(x.TransfersCompleted, "0") + self.assertEquals(x.TransfersFailed, "1") + \ No newline at end of file diff --git a/testSuite/scripts/test_hash_upload.py b/testSuite/scripts/test_hash_upload.py new file mode 100644 index 000000000..445dc53cb --- /dev/null +++ b/testSuite/scripts/test_hash_upload.py @@ -0,0 +1,42 @@ +import utility as util +import unittest +import time + + +TIMESTAMP = int(time.time()) + +class Hash_Upload_User_Scenarios(unittest.TestCase): + + def test_single_file_upload(self): + # creating directory with 1 file in it. + dir_name = f"upload_dir_hash_single_file_{TIMESTAMP}" + dir_n_files_path = util.create_test_n_files(1024, 1, dir_name) + tamper_proof = util.test_tamper_proof_endpoint + + # execute azcopy copy upload. + result = util.Command("copy").add_arguments(dir_n_files_path).add_arguments(util.test_container_url) \ + .add_flags("log-level", "info").add_flags("recursive", "true").add_flags("put-md5", "true"). \ + add_flags("tamper-proof", tamper_proof).execute_azcopy_copy_command() + self.assertTrue(result) + + # Verifying the uploaded blob + # calling the testBlob validator to verify whether blob has been successfully uploaded or not + result = util.Command("testBlob").add_arguments(dir_n_files_path).add_arguments(util.test_container_url).add_flags("is-object-dir", "true").execute_azcopy_verify() + self.assertTrue(result) + + def test_multiple_file_upload(self): + # creating directory with multiple files in it. + dir_name = f"upload_dir_hash_multiple_file_{TIMESTAMP}" + dir_n_files_path = util.create_test_n_files(1024, 3, dir_name) + tamper_proof = util.test_tamper_proof_endpoint + + # execute azcopy copy upload. + result = util.Command("copy").add_arguments(dir_n_files_path).add_arguments(util.test_container_url) \ + .add_flags("log-level", "info").add_flags("recursive", "true").add_flags("put-md5", "true"). \ + add_flags("tamper-proof", tamper_proof).execute_azcopy_copy_command() + self.assertTrue(result) + + # Verifying the uploaded blob + # calling the testBlob validator to verify whether blob has been successfully uploaded or not + result = util.Command("testBlob").add_arguments(dir_n_files_path).add_arguments(util.test_container_url).add_flags("is-object-dir", "true").execute_azcopy_verify() + self.assertTrue(result) diff --git a/testSuite/scripts/utility.py b/testSuite/scripts/utility.py index f708ac290..865efc33c 100644 --- a/testSuite/scripts/utility.py +++ b/testSuite/scripts/utility.py @@ -166,7 +166,7 @@ def clean_test_filesystem(fileSystemURLStr): # initialize_test_suite initializes the setup for executing test cases. def initialize_test_suite(test_dir_path, container_sas, container_oauth, container_oauth_validate, share_sas_url, premium_container_sas, filesystem_url, filesystem_sas_url, - s2s_src_blob_account_url, s2s_src_file_account_url, s2s_src_s3_service_url, s2s_src_gcp_service_url, s2s_dst_blob_account_url, azcopy_exec_location, test_suite_exec_location): + s2s_src_blob_account_url, s2s_src_file_account_url, s2s_src_s3_service_url, s2s_src_gcp_service_url, s2s_dst_blob_account_url, tamper_proof_endpoint, azcopy_exec_location, test_suite_exec_location): # test_directory_path is global variable holding the location of test directory to execute all the test cases. # contents are created, copied, uploaded and downloaded to and from this test directory only global test_directory_path @@ -206,6 +206,9 @@ def initialize_test_suite(test_dir_path, container_sas, container_oauth, contain global test_s2s_src_s3_service_url global test_s2s_src_gcp_service_url + # holds the test tamper-proof endpoint + global test_tamper_proof_endpoint + # creating a test_directory in the location given by user. # this directory will be used to created and download all the test files. new_dir_path = os.path.join(test_dir_path, "test_data") @@ -254,6 +257,7 @@ def initialize_test_suite(test_dir_path, container_sas, container_oauth, contain test_s2s_src_s3_service_url = s2s_src_s3_service_url test_s2s_src_gcp_service_url = s2s_src_gcp_service_url test_share_url = share_sas_url + test_tamper_proof_endpoint = tamper_proof_endpoint if not clean_test_filesystem(test_bfs_account_url.rstrip("/").rstrip("\\")): # rstrip because clean fails if trailing / print("failed to clean test filesystem.")