diff --git a/vtl-sdmx/pom.xml b/vtl-sdmx/pom.xml
index b29d4a844..880884985 100644
--- a/vtl-sdmx/pom.xml
+++ b/vtl-sdmx/pom.xml
@@ -35,7 +35,7 @@
io.sdmx
fusion-sdmx-ml
- 1.0.59-SNAPSHOT
+ 1.1.9-SNAPSHOT
fr.insee.trevas
diff --git a/vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java b/vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java
index c6147f45e..9ac410447 100644
--- a/vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java
+++ b/vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java
@@ -15,6 +15,7 @@
import io.sdmx.utils.core.io.ReadableDataLocationTmp;
import java.io.InputStream;
+import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collector;
@@ -117,12 +118,14 @@ public static Structured.DataStructure buildStructureFromSDMX3(SdmxBeans beans,
}
public static Map dataflows(SdmxBeans sdmxBeans) {
- return sdmxBeans.getDataflows().stream().collect(Collectors.toMap(
- INamedBean::getId,
- dataflowBean -> sdmxBeans.getDataStructures(dataflowBean.getDataStructureRef())
- .stream()
- .collect(toSingleton())
- ));
+ return sdmxBeans.getDataflows().stream()
+ .map(df -> sdmxBeans.getDataStructures(df.getDataStructureRef()))
+ .distinct()
+ .flatMap(Collection::stream)
+ .collect(Collectors.toMap(
+ INamedBean::getId,
+ dataStructureBean -> dataStructureBean
+ ));
}
public static Map vtlMapping(SdmxBeans sdmxBeans) {
diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java
index c01217942..936b24d4b 100644
--- a/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java
+++ b/vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java
@@ -55,7 +55,7 @@ public void bpeV1() throws ScriptException {
assertThat(bpeDetailDs.getDataStructure().size()).isEqualTo(6);
ScriptContext context = engine.getContext();
- context.setAttribute("BPE_DETAIL", bpeDetailDs, ScriptContext.ENGINE_SCOPE);
+ context.setAttribute("BPE_DETAIL_VTL", bpeDetailDs, ScriptContext.ENGINE_SCOPE);
// Step 1
engine.eval("" +
@@ -63,16 +63,15 @@ public void bpeV1() throws ScriptException {
" MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" +
"end datapoint ruleset;\n" +
"\n" +
- "CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL, UNIQUE_MUNICIPALITY invalid);");
+ "CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid);");
Dataset checkMunicipality = (Dataset) engine.getContext().getAttribute("CHECK_MUNICIPALITY");
assertThat(checkMunicipality.getDataPoints()).isEmpty();
// Step 2
- engine.eval("BPE_DETAIL_CLEAN := BPE_DETAIL" +
- " [drop LAMBERT_X, LAMBERT_Y]\n" +
- " [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];");
+ engine.eval("BPE_DETAIL_CLEAN := BPE_DETAIL_VTL[drop LAMBERT_X, LAMBERT_Y]\n" +
+ "[rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];");
Dataset bpeDetailClean = (Dataset) engine.getContext().getAttribute("BPE_DETAIL_CLEAN");
Structured.DataStructure bpeDetailCleanStructure = bpeDetailClean.getDataStructure();
@@ -90,8 +89,8 @@ public void bpeV1() throws ScriptException {
assertThat(bpeDetailCleanStructure.get("year").getRole()).isEqualTo(Dataset.Role.ATTRIBUTE);
// Step 3
- engine.eval("BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN" +
- " [aggr nb := count(id) group by municipality, year, facility_type];");
+ engine.eval("BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN[aggr nb := count(id) group by municipality, year, facility_type]" +
+ "[rename year to TIME_PERIOD];");
Dataset bpeMunicipality = (Dataset) engine.getContext().getAttribute("BPE_MUNICIPALITY");
Structured.DataStructure bpeMunicipalityStructure = bpeMunicipality.getDataStructure();
@@ -102,17 +101,16 @@ public void bpeV1() throws ScriptException {
assertThat(bpeMunicipalityStructure.get("facility_type").getType()).isEqualTo(String.class);
assertThat(bpeMunicipalityStructure.get("facility_type").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);
- assertThat(bpeMunicipalityStructure.get("year").getType()).isEqualTo(String.class);
- assertThat(bpeMunicipalityStructure.get("year").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);
+ assertThat(bpeMunicipalityStructure.get("TIME_PERIOD").getType()).isEqualTo(String.class);
+ assertThat(bpeMunicipalityStructure.get("TIME_PERIOD").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);
assertThat(bpeMunicipalityStructure.get("nb").getType()).isEqualTo(Long.class);
assertThat(bpeMunicipalityStructure.get("nb").getRole()).isEqualTo(Dataset.Role.MEASURE);
// Step 4
- engine.eval("BPE_NUTS3 <- BPE_MUNICIPALITY" +
- " [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)] \n" +
- " [aggr nb := count(nb) group by year, nuts3, facility_type];");
+ engine.eval("BPE_NUTS3 <- BPE_MUNICIPALITY[calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" +
+ "[aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];");
Dataset bpeNuts = (Dataset) engine.getContext().getAttribute("BPE_NUTS3");
Structured.DataStructure bpeNutsStructure = bpeNuts.getDataStructure();
@@ -123,8 +121,8 @@ public void bpeV1() throws ScriptException {
assertThat(bpeNutsStructure.get("facility_type").getType()).isEqualTo(String.class);
assertThat(bpeNutsStructure.get("facility_type").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);
- assertThat(bpeNutsStructure.get("year").getType()).isEqualTo(String.class);
- assertThat(bpeNutsStructure.get("year").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);
+ assertThat(bpeNutsStructure.get("TIME_PERIOD").getType()).isEqualTo(String.class);
+ assertThat(bpeNutsStructure.get("TIME_PERIOD").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);
assertThat(bpeNutsStructure.get("nb").getType()).isEqualTo(Long.class);
@@ -146,7 +144,7 @@ public void bpeV1() throws ScriptException {
// Step 6
Structured.DataStructure censusStructure = TrevasSDMXUtils.buildStructureFromSDMX3("src/test/resources/DSD_BPE_CENSUS.xml", "LEGAL_POP");
- SparkDataset censusNuts = new SparkDataset(
+ SparkDataset legalPop = new SparkDataset(
spark.read()
.option("header", "true")
.option("delimiter", ";")
@@ -155,13 +153,12 @@ public void bpeV1() throws ScriptException {
censusStructure
);
- context.setAttribute("CENSUS_NUTS3_2021", censusNuts, ScriptContext.ENGINE_SCOPE);
+ context.setAttribute("LEGAL_POP", legalPop, ScriptContext.ENGINE_SCOPE);
- engine.eval("CENSUS_NUTS3_2021 := CENSUS_NUTS3_2021 \n" +
- " [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop]\n" +
- " [filter year = \"2021\"]\n" +
- " [calc pop := cast(pop, integer)]" +
- " [drop year, NB_COM, POP_MUNI];");
+ engine.eval("CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" +
+ "[filter TIME_PERIOD = \"2021\"]\n" +
+ "[calc pop := cast(pop, integer)]\n" +
+ "[drop TIME_PERIOD, NB_COM, POP_MUNI];");
Dataset censusNuts2021 = (Dataset) engine.getContext().getAttribute("CENSUS_NUTS3_2021");
Structured.DataStructure censusNuts2021Structure = censusNuts2021.getDataStructure();
@@ -173,9 +170,8 @@ public void bpeV1() throws ScriptException {
assertThat(censusNuts2021Structure.get("pop").getRole()).isEqualTo(Dataset.Role.MEASURE);
// Step 7
- engine.eval("GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3" +
- " [filter facility_type = \"D201\" and year = \"2021\"]\n" +
- " [drop facility_type, year];");
+ engine.eval("GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" +
+ "[drop facility_type, TIME_PERIOD];");
Dataset generalNuts = (Dataset) engine.getContext().getAttribute("GENERAL_PRACT_NUTS3_2021");
Structured.DataStructure generalNutsStructure = generalNuts.getDataStructure();
@@ -188,8 +184,8 @@ public void bpeV1() throws ScriptException {
// Step 8
engine.eval("BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" +
- " [calc pract_per_10000_inhabitants := nb / pop * 10000]\n" +
- " [drop nb, pop];");
+ "[calc pract_per_10000_inhabitants := nb / pop * 10000]\n" +
+ "[drop nb, pop];");
Dataset bpeCensus = (Dataset) engine.getContext().getAttribute("BPE_CENSUS_NUTS3_2021");
Structured.DataStructure bpeCensusStructure = bpeCensus.getDataStructure();
diff --git a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java
index af3dcfdb5..40f29c7fe 100644
--- a/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java
+++ b/vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java
@@ -26,9 +26,15 @@ public class SDMXVTLWorkflowTest {
@BeforeEach
public void setUp() {
+ SparkSession.builder()
+ .appName("test")
+ .master("local")
+ .getOrCreate();
ScriptEngineManager mgr = new ScriptEngineManager();
engine = mgr.getEngineByExtension("vtl");
+
+ engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark");
}
@Disabled
@@ -54,16 +60,6 @@ void testRefFromRepo() {
@Test
void testGetEmptyDataset() {
-
- SparkSession.builder()
- .appName("test")
- .master("local")
- .getOrCreate();
-
- ScriptEngineManager mgr = new ScriptEngineManager();
- ScriptEngine engine = mgr.getEngineByExtension("vtl");
- engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark");
-
ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml");
SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Java8Helpers.mapOf());
Map emptyDatasets = sdmxVtlWorkflow.getEmptyDatasets();
@@ -84,13 +80,13 @@ void testGetEmptyDataset() {
new Structured.DataStructure(Java8Helpers.listOf(
new Structured.Component("facility_type", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("municipality", String.class, Dataset.Role.IDENTIFIER),
- new Structured.Component("year", String.class, Dataset.Role.IDENTIFIER),
+ new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("nb", Long.class, Dataset.Role.MEASURE)
))
);
assertThat(result.get("BPE_NUTS3").getDataStructure()).isEqualTo(
new Structured.DataStructure(Java8Helpers.listOf(
- new Structured.Component("year", String.class, Dataset.Role.IDENTIFIER),
+ new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("facility_type", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("nuts3", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("nb", Long.class, Dataset.Role.MEASURE)
@@ -103,8 +99,8 @@ public void testGetRulesetsVTL() {
ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml");
SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Java8Helpers.mapOf());
assertThat(sdmxVtlWorkflow.getRulesetsVTL()).isEqualTo(
- "define datapoint ruleset UNIQUE_MUNICIPALITY (valuedomain CL_DEPCOM) is\n" +
- " MUNICIPALITY_FORMAT_RULE : match_characters(CL_DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" +
+ "define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is\n" +
+ " MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" +
" end datapoint ruleset;\n" +
"\n" +
"define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" +
@@ -126,24 +122,24 @@ public void testGetTransformationsVTL() {
" [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" +
"\n" +
"// BPE aggregation by municipality, type and year\n" +
- "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type];\n" +
+ "BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD];\n" +
"\n" +
"// BPE aggregation by NUTS 3, type and year\n" +
"BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" +
- " [aggr nb := count(nb) group by year, nuts3, facility_type];\n" +
+ " [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" +
"\n" +
"// BPE validation of facility types by NUTS 3\n" +
"CHECK_NUTS3_TYPES := check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid);\n" +
"\n" +
"// Prepare 2021 census dataset by NUTS 3\n" +
- "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop]\n" +
- " [filter year = \"2021\"]\n" +
+ "CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" +
+ " [filter TIME_PERIOD = \"2021\"]\n" +
" [calc pop := cast(pop, integer)]\n" +
- " [drop year, NB_COM, POP_MUNI];\n" +
+ " [drop TIME_PERIOD, NB_COM, POP_MUNI];\n" +
"\n" +
"// Extract dataset on general practitioners from BPE by NUTS 3 in 2021\n" +
- "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and year = \"2021\"]\n" +
- " [drop facility_type, year];\n" +
+ "GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" +
+ " [drop facility_type, TIME_PERIOD];\n" +
"\n" +
"// Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator\n" +
"BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" +
diff --git a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml
index 219abf300..b16ece3e0 100644
--- a/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml
+++ b/vtl-sdmx/src/test/resources/DSD_BPE_CENSUS.xml
@@ -1,5 +1,5 @@
-
+
DSD_BPE_DETAIL_1049
false
@@ -159,7 +159,7 @@
Cube populations légales
@@ -218,21 +218,21 @@
urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_DEPCOM(1.0)
-
- urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR
-
-
-
-
+ position="2">
urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT
urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0)
+
+ urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR
+
+
+
+
@@ -261,21 +261,21 @@
urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_REF_AREA(1.0)
-
- urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR
-
-
-
-
+ position="2">
urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).TYPE_EQUIPEMENT
urn:sdmx:org.sdmx.infomodel.codelist.Codelist=FR1:CL_TYPEQU(1.0)
+
+ urn:sdmx:org.sdmx.infomodel.conceptscheme.Concept=FR1:INSEE_CONCEPTS(1.0).REF_YEAR
+
+
+
+
@@ -321,9 +321,9 @@
Dataflow for BPE_DETAIL
urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:BPE_DETAIL(1.0)
-
- Dataflow for BPE_CENSUS
- urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP(1.0)
+
+ Dataflow for LEGAL_POP_CUBE
+ urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FR1:LEGAL_POP_CUBE(1.0)
Dataflow for BPE_MUNICIPALITY
@@ -339,12 +339,16 @@
-
+
VTL Mapping Scheme #1
VTL Mapping #1
urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:BPE_DETAIL(1.0)
+
+ VTL Mapping #2
+ urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=FR1:LEGAL_POP_CUBE(1.0)
+
@@ -353,8 +357,8 @@
Datapoint Ruleset UNIQUE_MUNICIPALITY
- define datapoint ruleset UNIQUE_MUNICIPALITY (valuedomain CL_DEPCOM) is
- MUNICIPALITY_FORMAT_RULE : match_characters(CL_DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode "Municipality code is not in the correct format"
+ define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is
+ MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, "[0-9]{5}|2[A-B][0-9]{3}") errorcode "Municipality code is not in the correct format"
end datapoint ruleset;
@@ -371,7 +375,6 @@
Transformation Scheme for BPE - CENSUS
- urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0)
Step 1
Validation of municipality code in input file
@@ -391,7 +394,7 @@
Step 3
BPE aggregation by municipality, type and year
- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type]
+ BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD]
BPE_MUNICIPALITY
@@ -400,7 +403,7 @@
BPE aggregation by NUTS 3, type and year
BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = "97" then substr(municipality,1,3) else substr(municipality,1,2)]
- [aggr nb := count(nb) group by year, nuts3, facility_type]
+ [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type]
BPE_NUTS3
@@ -414,10 +417,10 @@
Step 6
Prepare 2021 census dataset by NUTS 3
- LEGAL_POP [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop]
- [filter year = "2021"]
+ LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]
+ [filter TIME_PERIOD = "2021"]
[calc pop := cast(pop, integer)]
- [drop year, NB_COM, POP_MUNI]
+ [drop TIME_PERIOD, NB_COM, POP_MUNI]
CENSUS_NUTS3_2021
@@ -425,8 +428,8 @@
Step 7
Extract dataset on general practitioners from BPE by NUTS 3 in 2021
- BPE_NUTS3 [filter facility_type = "D201" and year = "2021"]
- [drop facility_type, year]
+ BPE_NUTS3 [filter facility_type = "D201" and TIME_PERIOD = "2021"]
+ [drop facility_type, TIME_PERIOD]
GENERAL_PRACT_NUTS3_2021
@@ -440,6 +443,8 @@
BPE_CENSUS_NUTS3_2021
+ urn:sdmx:org.sdmx.infomodel.transformation.VtlMappingScheme=FR1:VTLMS1(1.0)
+ urn:sdmx:org.sdmx.infomodel.transformation.RulesetScheme=FR1:RS1(1.0)