diff --git a/vtl-sdmx/pom.xml b/vtl-sdmx/pom.xml index b29d4a844..880884985 100644 --- a/vtl-sdmx/pom.xml +++ b/vtl-sdmx/pom.xml @@ -35,7 +35,7 @@ io.sdmx fusion-sdmx-ml - 1.0.59-SNAPSHOT + 1.1.9-SNAPSHOT fr.insee.trevas diff --git a/vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java b/vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java index c6147f45e..9ac410447 100644 --- a/vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java +++ b/vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java @@ -15,6 +15,7 @@ import io.sdmx.utils.core.io.ReadableDataLocationTmp; import java.io.InputStream; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.stream.Collector; @@ -117,12 +118,14 @@ public static Structured.DataStructure buildStructureFromSDMX3(SdmxBeans beans, } public static Map dataflows(SdmxBeans sdmxBeans) { - return sdmxBeans.getDataflows().stream().collect(Collectors.toMap( - INamedBean::getId, - dataflowBean -> sdmxBeans.getDataStructures(dataflowBean.getDataStructureRef()) - .stream() - .collect(toSingleton()) - )); + return sdmxBeans.getDataflows().stream() + .map(df -> sdmxBeans.getDataStructures(df.getDataStructureRef())) + .distinct() + .flatMap(Collection::stream) + .collect(Collectors.toMap( + INamedBean::getId, + dataStructureBean -> dataStructureBean + )); } public static Map vtlMapping(SdmxBeans sdmxBeans) { diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java index c01217942..936b24d4b 100644 --- a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java +++ b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java @@ -55,7 +55,7 @@ public void bpeV1() throws ScriptException { assertThat(bpeDetailDs.getDataStructure().size()).isEqualTo(6); ScriptContext context = engine.getContext(); - context.setAttribute("BPE_DETAIL", bpeDetailDs, ScriptContext.ENGINE_SCOPE); + context.setAttribute("BPE_DETAIL_VTL", bpeDetailDs, ScriptContext.ENGINE_SCOPE); // Step 1 engine.eval("" + @@ -63,16 +63,15 @@ public void bpeV1() throws ScriptException { " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" + "end datapoint ruleset;\n" + "\n" + - "CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL, UNIQUE_MUNICIPALITY invalid);"); + "CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid);"); Dataset checkMunicipality = (Dataset) engine.getContext().getAttribute("CHECK_MUNICIPALITY"); assertThat(checkMunicipality.getDataPoints()).isEmpty(); // Step 2 - engine.eval("BPE_DETAIL_CLEAN := BPE_DETAIL" + - " [drop LAMBERT_X, LAMBERT_Y]\n" + - " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];"); + engine.eval("BPE_DETAIL_CLEAN := BPE_DETAIL_VTL[drop LAMBERT_X, LAMBERT_Y]\n" + + "[rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];"); Dataset bpeDetailClean = (Dataset) engine.getContext().getAttribute("BPE_DETAIL_CLEAN"); Structured.DataStructure bpeDetailCleanStructure = bpeDetailClean.getDataStructure(); @@ -90,8 +89,8 @@ public void bpeV1() throws ScriptException { assertThat(bpeDetailCleanStructure.get("year").getRole()).isEqualTo(Dataset.Role.ATTRIBUTE); // Step 3 - engine.eval("BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN" + - " [aggr nb := count(id) group by municipality, year, facility_type];"); + engine.eval("BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN[aggr nb := count(id) group by municipality, year, facility_type]" + + "[rename year to TIME_PERIOD];"); Dataset bpeMunicipality = (Dataset) engine.getContext().getAttribute("BPE_MUNICIPALITY"); Structured.DataStructure bpeMunicipalityStructure = bpeMunicipality.getDataStructure(); @@ -102,17 +101,16 @@ public void bpeV1() throws ScriptException { assertThat(bpeMunicipalityStructure.get("facility_type").getType()).isEqualTo(String.class); assertThat(bpeMunicipalityStructure.get("facility_type").getRole()).isEqualTo(Dataset.Role.IDENTIFIER); - assertThat(bpeMunicipalityStructure.get("year").getType()).isEqualTo(String.class); - assertThat(bpeMunicipalityStructure.get("year").getRole()).isEqualTo(Dataset.Role.IDENTIFIER); + assertThat(bpeMunicipalityStructure.get("TIME_PERIOD").getType()).isEqualTo(String.class); + assertThat(bpeMunicipalityStructure.get("TIME_PERIOD").getRole()).isEqualTo(Dataset.Role.IDENTIFIER); assertThat(bpeMunicipalityStructure.get("nb").getType()).isEqualTo(Long.class); assertThat(bpeMunicipalityStructure.get("nb").getRole()).isEqualTo(Dataset.Role.MEASURE); // Step 4 - engine.eval("BPE_NUTS3 <- BPE_MUNICIPALITY" + - " [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)] \n" + - " [aggr nb := count(nb) group by year, nuts3, facility_type];"); + engine.eval("BPE_NUTS3 <- BPE_MUNICIPALITY[calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" + + "[aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];"); Dataset bpeNuts = (Dataset) engine.getContext().getAttribute("BPE_NUTS3"); Structured.DataStructure bpeNutsStructure = bpeNuts.getDataStructure(); @@ -123,8 +121,8 @@ public void bpeV1() throws ScriptException { assertThat(bpeNutsStructure.get("facility_type").getType()).isEqualTo(String.class); assertThat(bpeNutsStructure.get("facility_type").getRole()).isEqualTo(Dataset.Role.IDENTIFIER); - assertThat(bpeNutsStructure.get("year").getType()).isEqualTo(String.class); - assertThat(bpeNutsStructure.get("year").getRole()).isEqualTo(Dataset.Role.IDENTIFIER); + assertThat(bpeNutsStructure.get("TIME_PERIOD").getType()).isEqualTo(String.class); + assertThat(bpeNutsStructure.get("TIME_PERIOD").getRole()).isEqualTo(Dataset.Role.IDENTIFIER); assertThat(bpeNutsStructure.get("nb").getType()).isEqualTo(Long.class); @@ -146,7 +144,7 @@ public void bpeV1() throws ScriptException { // Step 6 Structured.DataStructure censusStructure = TrevasSDMXUtils.buildStructureFromSDMX3("src/test/resources/DSD_BPE_CENSUS.xml", "LEGAL_POP"); - SparkDataset censusNuts = new SparkDataset( + SparkDataset legalPop = new SparkDataset( spark.read() .option("header", "true") .option("delimiter", ";") @@ -155,13 +153,12 @@ public void bpeV1() throws ScriptException { censusStructure ); - context.setAttribute("CENSUS_NUTS3_2021", censusNuts, ScriptContext.ENGINE_SCOPE); + context.setAttribute("LEGAL_POP", legalPop, ScriptContext.ENGINE_SCOPE); - engine.eval("CENSUS_NUTS3_2021 := CENSUS_NUTS3_2021 \n" + - " [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop]\n" + - " [filter year = \"2021\"]\n" + - " [calc pop := cast(pop, integer)]" + - " [drop year, NB_COM, POP_MUNI];"); + engine.eval("CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" + + "[filter TIME_PERIOD = \"2021\"]\n" + + "[calc pop := cast(pop, integer)]\n" + + "[drop TIME_PERIOD, NB_COM, POP_MUNI];"); Dataset censusNuts2021 = (Dataset) engine.getContext().getAttribute("CENSUS_NUTS3_2021"); Structured.DataStructure censusNuts2021Structure = censusNuts2021.getDataStructure(); @@ -173,9 +170,8 @@ public void bpeV1() throws ScriptException { assertThat(censusNuts2021Structure.get("pop").getRole()).isEqualTo(Dataset.Role.MEASURE); // Step 7 - engine.eval("GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3" + - " [filter facility_type = \"D201\" and year = \"2021\"]\n" + - " [drop facility_type, year];"); + engine.eval("GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" + + "[drop facility_type, TIME_PERIOD];"); Dataset generalNuts = (Dataset) engine.getContext().getAttribute("GENERAL_PRACT_NUTS3_2021"); Structured.DataStructure generalNutsStructure = generalNuts.getDataStructure(); @@ -188,8 +184,8 @@ public void bpeV1() throws ScriptException { // Step 8 engine.eval("BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" + - " [calc pract_per_10000_inhabitants := nb / pop * 10000]\n" + - " [drop nb, pop];"); + "[calc pract_per_10000_inhabitants := nb / pop * 10000]\n" + + "[drop nb, pop];"); Dataset bpeCensus = (Dataset) engine.getContext().getAttribute("BPE_CENSUS_NUTS3_2021"); Structured.DataStructure bpeCensusStructure = bpeCensus.getDataStructure(); diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java index af3dcfdb5..40f29c7fe 100644 --- a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java +++ b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java @@ -26,9 +26,15 @@ public class SDMXVTLWorkflowTest { @BeforeEach public void setUp() { + SparkSession.builder() + .appName("test") + .master("local") + .getOrCreate(); ScriptEngineManager mgr = new ScriptEngineManager(); engine = mgr.getEngineByExtension("vtl"); + + engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark"); } @Disabled @@ -54,16 +60,6 @@ void testRefFromRepo() { @Test void testGetEmptyDataset() { - - SparkSession.builder() - .appName("test") - .master("local") - .getOrCreate(); - - ScriptEngineManager mgr = new ScriptEngineManager(); - ScriptEngine engine = mgr.getEngineByExtension("vtl"); - engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark"); - ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml"); SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Java8Helpers.mapOf()); Map emptyDatasets = sdmxVtlWorkflow.getEmptyDatasets(); @@ -84,13 +80,13 @@ void testGetEmptyDataset() { new Structured.DataStructure(Java8Helpers.listOf( new Structured.Component("facility_type", String.class, Dataset.Role.IDENTIFIER), new Structured.Component("municipality", String.class, Dataset.Role.IDENTIFIER), - new Structured.Component("year", String.class, Dataset.Role.IDENTIFIER), + new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER), new Structured.Component("nb", Long.class, Dataset.Role.MEASURE) )) ); assertThat(result.get("BPE_NUTS3").getDataStructure()).isEqualTo( new Structured.DataStructure(Java8Helpers.listOf( - new Structured.Component("year", String.class, Dataset.Role.IDENTIFIER), + new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER), new Structured.Component("facility_type", String.class, Dataset.Role.IDENTIFIER), new Structured.Component("nuts3", String.class, Dataset.Role.IDENTIFIER), new Structured.Component("nb", Long.class, Dataset.Role.MEASURE) @@ -103,8 +99,8 @@ public void testGetRulesetsVTL() { ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml"); SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Java8Helpers.mapOf()); assertThat(sdmxVtlWorkflow.getRulesetsVTL()).isEqualTo( - "define datapoint ruleset UNIQUE_MUNICIPALITY (valuedomain CL_DEPCOM) is\n" + - " MUNICIPALITY_FORMAT_RULE : match_characters(CL_DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" + + "define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is\n" + + " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" + " end datapoint ruleset;\n" + "\n" + "define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" + @@ -126,24 +122,24 @@ public void testGetTransformationsVTL() { " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" + "\n" + "// BPE aggregation by municipality, type and year\n" + - "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type];\n" + + "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD];\n" + "\n" + "// BPE aggregation by NUTS 3, type and year\n" + "BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" + - " [aggr nb := count(nb) group by year, nuts3, facility_type];\n" + + " [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" + "\n" + "// BPE validation of facility types by NUTS 3\n" + "CHECK_NUTS3_TYPES := check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid);\n" + "\n" + "// Prepare 2021 census dataset by NUTS 3\n" + - "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop]\n" + - " [filter year = \"2021\"]\n" + + "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" + + " [filter TIME_PERIOD = \"2021\"]\n" + " [calc pop := cast(pop, integer)]\n" + - " [drop year, NB_COM, POP_MUNI];\n" + + " [drop TIME_PERIOD, NB_COM, POP_MUNI];\n" + "\n" + "// Extract dataset on general practitioners from BPE by NUTS 3 in 2021\n" + - "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and year = \"2021\"]\n" + - " [drop facility_type, year];\n" + + "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" + + " [drop facility_type, TIME_PERIOD];\n" + "\n" + "// Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator\n" + "BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" + diff --git a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml index 219abf300..b16ece3e0 100644 --- a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml +++ b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml @@ -1,5 +1,5 @@ - + DSD_BPE_DETAIL_1049 false @@ -159,7 +159,7 @@ Cube populations légales @@ -218,21 +218,21 @@ urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0) - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR - - - - + position="2"> urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + + + + @@ -261,21 +261,21 @@ urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0) - - urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR - - - - + position="2"> urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0) + + urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR + + + + @@ -321,9 +321,9 @@ Dataflow for BPE_DETAIL urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_DETAIL(1.0) - - Dataflow for BPE_CENSUS - urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP(1.0) + + Dataflow for LEGAL_POP_CUBE + urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP_CUBE(1.0) Dataflow for BPE_MUNICIPALITY @@ -339,12 +339,16 @@ - + VTL Mapping Scheme #1 VTL Mapping #1 urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:BPE_DETAIL(1.0) + + VTL Mapping #2 + urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:LEGAL_POP_CUBE(1.0) + @@ -353,8 +357,8 @@ Datapoint Ruleset UNIQUE_MUNICIPALITY - define datapoint ruleset UNIQUE_MUNICIPALITY (valuedomain CL_DEPCOM) is - MUNICIPALITY_FORMAT_RULE : match_characters(CL_DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode "Municipality code is not in the correct format" + define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is + MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode "Municipality code is not in the correct format" end datapoint ruleset; @@ -371,7 +375,6 @@ Transformation Scheme for BPE - CENSUS - urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0) Step 1 Validation of municipality code in input file @@ -391,7 +394,7 @@ Step 3 BPE aggregation by municipality, type and year - BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] + BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD] BPE_MUNICIPALITY @@ -400,7 +403,7 @@ BPE aggregation by NUTS 3, type and year BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = "97" then substr(municipality,1,3) else substr(municipality,1,2)] - [aggr nb := count(nb) group by year, nuts3, facility_type] + [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type] BPE_NUTS3 @@ -414,10 +417,10 @@ Step 6 Prepare 2021 census dataset by NUTS 3 - LEGAL_POP [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop] - [filter year = "2021"] + LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop] + [filter TIME_PERIOD = "2021"] [calc pop := cast(pop, integer)] - [drop year, NB_COM, POP_MUNI] + [drop TIME_PERIOD, NB_COM, POP_MUNI] CENSUS_NUTS3_2021 @@ -425,8 +428,8 @@ Step 7 Extract dataset on general practitioners from BPE by NUTS 3 in 2021 - BPE_NUTS3 [filter facility_type = "D201" and year = "2021"] - [drop facility_type, year] + BPE_NUTS3 [filter facility_type = "D201" and TIME_PERIOD = "2021"] + [drop facility_type, TIME_PERIOD] GENERAL_PRACT_NUTS3_2021 @@ -440,6 +443,8 @@ BPE_CENSUS_NUTS3_2021 + urn:sdmx:org.sdmx.infomodel.transformation.VtlMappingScheme=FR1:VTLMS1(1.0) + urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0)