From 4881434e255b07430198d937324bb19a1105d8f7 Mon Sep 17 00:00:00 2001
From: AnkitCLI <ankit.yadav@cloudsufi.com>
Date: Fri, 22 Nov 2024 14:55:37 +0530
Subject: [PATCH] Deduplicate e2e scenarios

---
 .../DeduplicateErrorScenarios.feature         | 24 ++++++
 .../Deduplicate_RuntimeErrorScenarios.feature | 79 +++++++++++++++++++
 .../deduplicate/FileToDeduplicate.feature     | 47 +++++++++++
 .../e2e-test/features/joiner/Joiner.feature   |  2 +-
 .../resources/errorMessage.properties         |  1 +
 .../resources/pluginParameters.properties     |  2 +
 .../CSV_DEDUPLICATE_TEST7.Output.csv          |  2 +
 7 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 core-plugins/src/e2e-test/features/deduplicate/Deduplicate_RuntimeErrorScenarios.feature
 create mode 100644 core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST7.Output.csv

diff --git a/core-plugins/src/e2e-test/features/deduplicate/DeduplicateErrorScenarios.feature b/core-plugins/src/e2e-test/features/deduplicate/DeduplicateErrorScenarios.feature
index a7782ab23..8a02a1fb8 100644
--- a/core-plugins/src/e2e-test/features/deduplicate/DeduplicateErrorScenarios.feature
+++ b/core-plugins/src/e2e-test/features/deduplicate/DeduplicateErrorScenarios.feature
@@ -49,3 +49,27 @@ Feature:Deduplicate - Verify Deduplicate Plugin Error scenarios
     Then Select Deduplicate plugin property: filterOperation field function with value: "deduplicateFilterFunctionMax"
     Then Click on the Validate button
     Then Verify that the Plugin Property: "filterOperation" is displaying an in-line error message: "errorMessageDeduplicateInvalidFunction"
+
+  @GCS_DEDUPLICATE_TEST
+  Scenario:Verify Deduplicate plugin error for FilterOperation field with invalid field name
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "File" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Analytics"
+    When Select plugin: "Deduplicate" from the plugins list as: "Analytics"
+    Then Connect plugins: "File" and "Deduplicate" to establish connection
+    Then Navigate to the properties page of plugin: "File"
+    Then Enter input plugin property: "referenceName" with value: "FileReferenceName"
+    Then Enter input plugin property: "path" with value: "gcsDeduplicateTest"
+    Then Select dropdown plugin property: "format" with option value: "csv"
+    Then Click plugin property: "skipHeader"
+    Then Click on the Get Schema button
+    Then Verify the Output Schema matches the Expected Schema: "deduplicateOutputSchema"
+    Then Validate "File" plugin properties
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "Deduplicate"
+    Then Select dropdown plugin property: "uniqueFields" with option value: "fname"
+    Then Press ESC key to close the unique fields dropdown
+    Then Enter Deduplicate plugin property: filterOperation field name with value: "deduplicateInvalidFieldName"
+    Then Select Deduplicate plugin property: filterOperation field function with value: "deduplicateFilterFunctionMax"
+    Then Click on the Validate button
+    Then Verify that the Plugin Property: "filterOperation" is displaying an in-line error message: "errorMessageDeduplicateInvalidFieldName"
diff --git a/core-plugins/src/e2e-test/features/deduplicate/Deduplicate_RuntimeErrorScenarios.feature b/core-plugins/src/e2e-test/features/deduplicate/Deduplicate_RuntimeErrorScenarios.feature
new file mode 100644
index 000000000..9e4b07e69
--- /dev/null
+++ b/core-plugins/src/e2e-test/features/deduplicate/Deduplicate_RuntimeErrorScenarios.feature
@@ -0,0 +1,79 @@
+@Deduplicate
+Feature:Deduplicate - Verify Deduplicate Plugin Runtime Error Scenarios
+
+  @GCS_DEDUPLICATE_TEST @FILE_SINK_TEST
+  Scenario:Verify the Pipeline Fails When the Unique Field Column is Empty
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "File" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Analytics"
+    When Select plugin: "Deduplicate" from the plugins list as: "Analytics"
+    Then Connect plugins: "File" and "Deduplicate" to establish connection
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "File" from the plugins list as: "Sink"
+    Then Connect plugins: "Deduplicate" and "File2" to establish connection
+    Then Navigate to the properties page of plugin: "File"
+    Then Enter input plugin property: "referenceName" with value: "FileReferenceName"
+    Then Enter input plugin property: "path" with value: "gcsDeduplicateTest"
+    Then Select dropdown plugin property: "format" with option value: "csv"
+    Then Click plugin property: "skipHeader"
+    Then Click on the Get Schema button
+    Then Verify the Output Schema matches the Expected Schema: "deduplicateOutputSchema"
+    Then Validate "File" plugin properties
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "Deduplicate"
+    Then Validate "Deduplicate" plugin properties
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "File2"
+    Then Enter input plugin property: "referenceName" with value: "FileReferenceName"
+    Then Enter input plugin property: "path" with value: "fileSinkTargetBucket"
+    Then Replace input plugin property: "pathSuffix" with value: "yyyy-MM-dd-HH-mm-ss"
+    Then Select dropdown plugin property: "format" with option value: "csv"
+    Then Validate "File" plugin properties
+    Then Close the Plugin Properties page
+    Then Save the pipeline
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Failed"
+
+  @GCS_DEDUPLICATE_TEST @FILE_SINK_TEST
+  Scenario: To verify that pipeline fails from File to File using Deduplicate plugin with invalid partition and invalid unique field as macro argument
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "File" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Analytics"
+    When Select plugin: "Deduplicate" from the plugins list as: "Analytics"
+    Then Connect plugins: "File" and "Deduplicate" to establish connection
+    Then Navigate to the properties page of plugin: "File"
+    Then Enter input plugin property: "referenceName" with value: "FileReferenceName"
+    Then Enter input plugin property: "path" with value: "gcsDeduplicateTest"
+    Then Select dropdown plugin property: "format" with option value: "csv"
+    Then Click plugin property: "skipHeader"
+    Then Click on the Get Schema button
+    Then Verify the Output Schema matches the Expected Schema: "deduplicateOutputSchema"
+    Then Validate "File" plugin properties
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "Deduplicate"
+    Then Click on the Macro button of Property: "deduplicateUniqueFields" and set the value to: "deduplicateInvalidFieldName"
+    Then Click on the Macro button of Property: "deduplicateNumPartitions" and set the value to: "deduplicateInvalidNumberOfPartitions"
+    Then Validate "Deduplicate" plugin properties
+    Then Close the Plugin Properties page
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "File" from the plugins list as: "Sink"
+    Then Connect plugins: "Deduplicate" and "File2" to establish connection
+    Then Navigate to the properties page of plugin: "File2"
+    Then Enter input plugin property: "referenceName" with value: "FileReferenceName"
+    Then Enter input plugin property: "path" with value: "fileSinkTargetBucket"
+    Then Replace input plugin property: "pathSuffix" with value: "yyyy-MM-dd-HH-mm-ss"
+    Then Select dropdown plugin property: "format" with option value: "csv"
+    Then Validate "File" plugin properties
+    Then Close the Plugin Properties page
+    Then Save the pipeline
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Enter runtime argument value "deduplicateInvalidFieldName" for key "deduplicateInvalidFieldName"
+    Then Enter runtime argument value "deduplicateInvalidNumberOfPartitions" for key "deduplicateInvalidNumberOfPartitions"
+    Then Run the Pipeline in Runtime with runtime arguments
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Failed"
diff --git a/core-plugins/src/e2e-test/features/deduplicate/FileToDeduplicate.feature b/core-plugins/src/e2e-test/features/deduplicate/FileToDeduplicate.feature
index a6574ef84..48cfd612d 100644
--- a/core-plugins/src/e2e-test/features/deduplicate/FileToDeduplicate.feature
+++ b/core-plugins/src/e2e-test/features/deduplicate/FileToDeduplicate.feature
@@ -260,3 +260,50 @@ Feature: Deduplicate - Verification of Deduplicate pipeline with File as source
     Then Close the pipeline logs
     Then Validate OUT record count of deduplicate is equal to IN record count of sink
     Then Validate output file generated by file sink plugin "fileSinkTargetBucket" is equal to expected output file "deduplicateTest6OutputFile"
+
+  @GCS_DEDUPLICATE_TEST @FILE_SINK_TEST @Deduplicate_Required @ITN_TEST
+  Scenario: To verify data transfer from File source to File sink using Deduplicate Plugin with only Unique field
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "File" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Analytics"
+    When Select plugin: "Deduplicate" from the plugins list as: "Analytics"
+    Then Connect plugins: "File" and "Deduplicate" to establish connection
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "File" from the plugins list as: "Sink"
+    Then Connect plugins: "Deduplicate" and "File2" to establish connection
+    Then Navigate to the properties page of plugin: "File"
+    Then Enter input plugin property: "referenceName" with value: "FileReferenceName"
+    Then Enter input plugin property: "path" with value: "gcsDeduplicateTest"
+    Then Select dropdown plugin property: "format" with option value: "csv"
+    Then Click plugin property: "skipHeader"
+    Then Click on the Get Schema button
+    Then Verify the Output Schema matches the Expected Schema: "deduplicateOutputSchema"
+    Then Validate "File" plugin properties
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "Deduplicate"
+    Then Select dropdown plugin property: "uniqueFields" with option value: "fname"
+    Then Press ESC key to close the unique fields dropdown
+    Then Validate "Deduplicate" plugin properties
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "File2"
+    Then Enter input plugin property: "referenceName" with value: "FileReferenceName"
+    Then Enter input plugin property: "path" with value: "fileSinkTargetBucket"
+    Then Replace input plugin property: "pathSuffix" with value: "yyyy-MM-dd-HH-mm-ss"
+    Then Select dropdown plugin property: "format" with option value: "csv"
+    Then Validate "File" plugin properties
+    Then Close the Plugin Properties page
+    Then Save the pipeline
+    Then Preview and run the pipeline
+    Then Wait till pipeline preview is in running state
+    Then Open and capture pipeline preview logs
+    Then Verify the preview run status of pipeline in the logs is "succeeded"
+    Then Close the pipeline logs
+    Then Close the preview
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Close the pipeline logs
+    Then Validate OUT record count of deduplicate is equal to IN record count of sink
+    Then Validate output file generated by file sink plugin "fileSinkTargetBucket" is equal to expected output file "deduplicateTest7OutputFile"
diff --git a/core-plugins/src/e2e-test/features/joiner/Joiner.feature b/core-plugins/src/e2e-test/features/joiner/Joiner.feature
index 54974bdaa..220c59aa5 100644
--- a/core-plugins/src/e2e-test/features/joiner/Joiner.feature
+++ b/core-plugins/src/e2e-test/features/joiner/Joiner.feature
@@ -75,5 +75,5 @@ Feature: Joiner analytics - Verify File source data transfer using Joiner analyt
     Then Wait till pipeline is in running state
     Then Open and capture logs
     Then Verify the pipeline status is "Succeeded"
-    Then Verify the CSV Output File matches the Expected Output File: "joinerOutput" With Expected Partitions: "expectedJoinerOutputPartitions"
     Then Close the pipeline logs
+    Then Verify the CSV Output File matches the Expected Output File: "joinerOutput" With Expected Partitions: "expectedJoinerOutputPartitions"
diff --git a/core-plugins/src/e2e-test/resources/errorMessage.properties b/core-plugins/src/e2e-test/resources/errorMessage.properties
index f42abe51a..3b3c02fcd 100644
--- a/core-plugins/src/e2e-test/resources/errorMessage.properties
+++ b/core-plugins/src/e2e-test/resources/errorMessage.properties
@@ -23,3 +23,4 @@ errorMessageJoinerBasicJoinCondition=Join keys cannot be empty
 errorMessageJoinerAdvancedJoinCondition=A join condition must be specified.
 errorMessageJoinerInputLoadMemory=Advanced outer joins must specify an input to load in memory.
 errorMessageJoinerAdvancedJoinConditionType=Advanced join conditions can only be used when there are two inputs.
+errorMessageDeduplicateInvalidFieldName=Invalid filter MAX(abcd): Field 'abcd' does not exist in input schema
diff --git a/core-plugins/src/e2e-test/resources/pluginParameters.properties b/core-plugins/src/e2e-test/resources/pluginParameters.properties
index f89e75c42..a9c2ca992 100644
--- a/core-plugins/src/e2e-test/resources/pluginParameters.properties
+++ b/core-plugins/src/e2e-test/resources/pluginParameters.properties
@@ -180,6 +180,7 @@ deduplicateFilterFunctionLast=Last
 deduplicateFilterFunctionFirst=First
 deduplicateFieldName=fname
 deduplicateFilterOperation=cost:Max
+deduplicateInvalidFieldName=abcd
 deduplicateNumberOfPartitions=2
 deduplicateInvalidNumberOfPartitions=@#$%
 deduplicateFilterFieldName=cost
@@ -191,6 +192,7 @@ deduplicateTest3OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST3_Outp
 deduplicateMacroOutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST4_Output.csv
 deduplicateTest5OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST5_Output.csv
 deduplicateTest6OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST6_Output.csv
+deduplicateTest7OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST7_Output.csv
 ## Deduplicate-PLUGIN-PROPERTIES-END
 
 ## GROUPBY-PLUGIN-PROPERTIES-START
diff --git a/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST7.Output.csv b/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST7.Output.csv
new file mode 100644
index 000000000..9c7e6eca5
--- /dev/null
+++ b/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST7.Output.csv
@@ -0,0 +1,2 @@
+alice,smith,1.5,34567
+bob,smith,50.23,12345
\ No newline at end of file