diff --git a/core-plugins/src/e2e-test/features/deduplicate/FileToDeduplicate.feature b/core-plugins/src/e2e-test/features/deduplicate/FileToDeduplicate.feature index 8f5835236..2a61e5a23 100644 --- a/core-plugins/src/e2e-test/features/deduplicate/FileToDeduplicate.feature +++ b/core-plugins/src/e2e-test/features/deduplicate/FileToDeduplicate.feature @@ -156,3 +156,107 @@ Feature: Deduplicate - Verification of Deduplicate pipeline with File as source Then Close the pipeline logs Then Validate OUT record count of deduplicate is equal to IN record count of sink Then Validate output file generated by file sink plugin "fileSinkTargetBucket" is equal to expected output file "deduplicateTest3OutputFile" + + @GCS_DEDUPLICATE_TEST @FILE_SINK_TEST + Scenario: To verify complete flow of data extract and transfer from File source to File sink using Deduplicate Plugin with Last filter option + Given Open Datafusion Project to configure pipeline + When Select plugin: "File" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Analytics" + When Select plugin: "Deduplicate" from the plugins list as: "Analytics" + Then Connect plugins: "File" and "Deduplicate" to establish connection + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "File" from the plugins list as: "Sink" + Then Connect plugins: "Deduplicate" and "File2" to establish connection + Then Navigate to the properties page of plugin: "File" + Then Enter input plugin property: "referenceName" with value: "FileReferenceName" + Then Enter input plugin property: "path" with value: "gcsDeduplicateTest" + Then Select dropdown plugin property: "format" with option value: "csv" + Then Click plugin property: "skipHeader" + Then Click on the Get Schema button + Then Verify the Output Schema matches the Expected Schema: "deduplicateOutputSchema" + Then Validate "File" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "Deduplicate" + Then Enter Deduplicate plugin property: filterOperation field name with value: "deduplicateFieldName" + Then Select Deduplicate plugin property: filterOperation field function with value: "deduplicateFilterFunctionLast" + Then Select dropdown plugin property: "uniqueFields" with option value: "fname" + Then Press ESC key to close the unique fields dropdown + Then Select dropdown plugin property: "uniqueFields" with option value: "lname" + Then Press ESC key to close the unique fields dropdown + Then Enter input plugin property: "deduplicateNumPartitions" with value: "deduplicateNumberOfPartitions" + Then Validate "Deduplicate" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "File2" + Then Enter input plugin property: "referenceName" with value: "FileReferenceName" + Then Enter input plugin property: "path" with value: "fileSinkTargetBucket" + Then Replace input plugin property: "pathSuffix" with value: "yyyy-MM-dd-HH-mm-ss" + Then Select dropdown plugin property: "format" with option value: "csv" + Then Validate "File" plugin properties + Then Close the Plugin Properties page + Then Save the pipeline + Then Preview and run the pipeline + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate OUT record count of deduplicate is equal to IN record count of sink + Then Validate output file generated by file sink plugin "fileSinkTargetBucket" is equal to expected output file "deduplicateTest5OutputFile" + + @GCS_DEDUPLICATE_TEST @FILE_SINK_TEST + Scenario: To verify complete flow of data extract and transfer from File source to File sink using Deduplicate Plugin with First filter option + Given Open Datafusion Project to configure pipeline + When Select plugin: "File" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Analytics" + When Select plugin: "Deduplicate" from the plugins list as: "Analytics" + Then Connect plugins: "File" and "Deduplicate" to establish connection + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "File" from the plugins list as: "Sink" + Then Connect plugins: "Deduplicate" and "File2" to establish connection + Then Navigate to the properties page of plugin: "File" + Then Enter input plugin property: "referenceName" with value: "FileReferenceName" + Then Enter input plugin property: "path" with value: "gcsDeduplicateTest" + Then Select dropdown plugin property: "format" with option value: "csv" + Then Click plugin property: "skipHeader" + Then Click on the Get Schema button + Then Verify the Output Schema matches the Expected Schema: "deduplicateOutputSchema" + Then Validate "File" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "Deduplicate" + Then Enter Deduplicate plugin property: filterOperation field name with value: "deduplicateFieldName" + Then Select Deduplicate plugin property: filterOperation field function with value: "deduplicateFilterFunctionFirst" + Then Select dropdown plugin property: "uniqueFields" with option value: "fname" + Then Press ESC key to close the unique fields dropdown + Then Select dropdown plugin property: "uniqueFields" with option value: "lname" + Then Press ESC key to close the unique fields dropdown + Then Enter input plugin property: "deduplicateNumPartitions" with value: "deduplicateNumberOfPartitions" + Then Validate "Deduplicate" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "File2" + Then Enter input plugin property: "referenceName" with value: "FileReferenceName" + Then Enter input plugin property: "path" with value: "fileSinkTargetBucket" + Then Replace input plugin property: "pathSuffix" with value: "yyyy-MM-dd-HH-mm-ss" + Then Select dropdown plugin property: "format" with option value: "csv" + Then Validate "File" plugin properties + Then Close the Plugin Properties page + Then Save the pipeline + Then Preview and run the pipeline + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate OUT record count of deduplicate is equal to IN record count of sink + Then Validate output file generated by file sink plugin "fileSinkTargetBucket" is equal to expected output file "deduplicateTest6OutputFile" diff --git a/core-plugins/src/e2e-test/resources/pluginParameters.properties b/core-plugins/src/e2e-test/resources/pluginParameters.properties index 247f763b2..f9d3fb755 100644 --- a/core-plugins/src/e2e-test/resources/pluginParameters.properties +++ b/core-plugins/src/e2e-test/resources/pluginParameters.properties @@ -174,6 +174,8 @@ deduplicateFileCsvFile=testdata/file/CSV_DEDUP_TEST.csv deduplicateFilterFunctionMax=Max deduplicateFilterFunctionMin=Min deduplicateFilterFunctionAny=Any +deduplicateFilterFunctionLast=Last +deduplicateFilterFunctionFirst=First deduplicateFieldName=fname deduplicateFilterOperation=cost:Max deduplicateNumberOfPartitions=2 @@ -185,6 +187,8 @@ deduplicateTest1OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST1_Outp deduplicateTest2OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST2_Output.csv deduplicateTest3OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST3_Output.csv deduplicateMacroOutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST4_Output.csv +deduplicateTest5OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST5_Output.csv +deduplicateTest6OutputFile=e2e-tests/expected_outputs/CSV_DEDUPLICATE_TEST6_Output.csv ## Deduplicate-PLUGIN-PROPERTIES-END ## GROUPBY-PLUGIN-PROPERTIES-START diff --git a/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST5_Output.csv b/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST5_Output.csv new file mode 100644 index 000000000..f6cf1b216 --- /dev/null +++ b/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST5_Output.csv @@ -0,0 +1,4 @@ +alice,smith,30.21,56789 +bob,jones,30.64,23456 +alice,jones,500.93,67890 +bob,smith,0.5,45678 diff --git a/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST6_Output.csv b/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST6_Output.csv new file mode 100644 index 000000000..694c5a0ed --- /dev/null +++ b/core-plugins/src/e2e-test/resources/testdata/expected_outputs/CSV_DEDUPLICATE_TEST6_Output.csv @@ -0,0 +1,4 @@ +alice,smith,1.5,34567 +bob,jones,30.64,23456 +alice,jones,500.93,67890 +bob,smith,50.23,12345