Skip to content

Commit

Permalink
Merge pull request #375 from InseeFr/sdmx-ruleset-error
Browse files Browse the repository at this point in the history
Fix/SDMX-utils
  • Loading branch information
NicoLaval authored Jan 23, 2025
2 parents fb4dc62 + f1bfe6d commit 7344c7d
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 86 deletions.
2 changes: 1 addition & 1 deletion vtl-sdmx/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
<dependency>
<groupId>io.sdmx</groupId>
<artifactId>fusion-sdmx-ml</artifactId>
<version>1.0.59-SNAPSHOT</version>
<version>1.1.9-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.insee.trevas</groupId>
Expand Down
15 changes: 9 additions & 6 deletions vtl-sdmx/src/main/java/fr/insee/vtl/sdmx/TrevasSDMXUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import io.sdmx.utils.core.io.ReadableDataLocationTmp;

import java.io.InputStream;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collector;
Expand Down Expand Up @@ -117,12 +118,14 @@ public static Structured.DataStructure buildStructureFromSDMX3(SdmxBeans beans,
}

public static Map<String, DataStructureBean> dataflows(SdmxBeans sdmxBeans) {
return sdmxBeans.getDataflows().stream().collect(Collectors.toMap(
INamedBean::getId,
dataflowBean -> sdmxBeans.getDataStructures(dataflowBean.getDataStructureRef())
.stream()
.collect(toSingleton())
));
return sdmxBeans.getDataflows().stream()
.map(df -> sdmxBeans.getDataStructures(df.getDataStructureRef()))
.distinct()
.flatMap(Collection::stream)
.collect(Collectors.toMap(
INamedBean::getId,
dataStructureBean -> dataStructureBean
));
}

public static Map<String, DataStructureBean> vtlMapping(SdmxBeans sdmxBeans) {
Expand Down
48 changes: 22 additions & 26 deletions vtl-sdmx/src/test/java/fr/insee/vtl/BPETest.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,23 @@ public void bpeV1() throws ScriptException {
assertThat(bpeDetailDs.getDataStructure().size()).isEqualTo(6);

ScriptContext context = engine.getContext();
context.setAttribute("BPE_DETAIL", bpeDetailDs, ScriptContext.ENGINE_SCOPE);
context.setAttribute("BPE_DETAIL_VTL", bpeDetailDs, ScriptContext.ENGINE_SCOPE);

// Step 1
engine.eval("" +
"define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is\n" +
" MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" +
"end datapoint ruleset;\n" +
"\n" +
"CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL, UNIQUE_MUNICIPALITY invalid);");
"CHECK_MUNICIPALITY := check_datapoint(BPE_DETAIL_VTL, UNIQUE_MUNICIPALITY invalid);");

Dataset checkMunicipality = (Dataset) engine.getContext().getAttribute("CHECK_MUNICIPALITY");

assertThat(checkMunicipality.getDataPoints()).isEmpty();

// Step 2
engine.eval("BPE_DETAIL_CLEAN := BPE_DETAIL" +
" [drop LAMBERT_X, LAMBERT_Y]\n" +
" [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];");
engine.eval("BPE_DETAIL_CLEAN := BPE_DETAIL_VTL[drop LAMBERT_X, LAMBERT_Y]\n" +
"[rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];");

Dataset bpeDetailClean = (Dataset) engine.getContext().getAttribute("BPE_DETAIL_CLEAN");
Structured.DataStructure bpeDetailCleanStructure = bpeDetailClean.getDataStructure();
Expand All @@ -90,8 +89,8 @@ public void bpeV1() throws ScriptException {
assertThat(bpeDetailCleanStructure.get("year").getRole()).isEqualTo(Dataset.Role.ATTRIBUTE);

// Step 3
engine.eval("BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN" +
" [aggr nb := count(id) group by municipality, year, facility_type];");
engine.eval("BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN[aggr nb := count(id) group by municipality, year, facility_type]" +
"[rename year to TIME_PERIOD];");

Dataset bpeMunicipality = (Dataset) engine.getContext().getAttribute("BPE_MUNICIPALITY");
Structured.DataStructure bpeMunicipalityStructure = bpeMunicipality.getDataStructure();
Expand All @@ -102,17 +101,16 @@ public void bpeV1() throws ScriptException {
assertThat(bpeMunicipalityStructure.get("facility_type").getType()).isEqualTo(String.class);
assertThat(bpeMunicipalityStructure.get("facility_type").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);

assertThat(bpeMunicipalityStructure.get("year").getType()).isEqualTo(String.class);
assertThat(bpeMunicipalityStructure.get("year").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);
assertThat(bpeMunicipalityStructure.get("TIME_PERIOD").getType()).isEqualTo(String.class);
assertThat(bpeMunicipalityStructure.get("TIME_PERIOD").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);


assertThat(bpeMunicipalityStructure.get("nb").getType()).isEqualTo(Long.class);
assertThat(bpeMunicipalityStructure.get("nb").getRole()).isEqualTo(Dataset.Role.MEASURE);

// Step 4
engine.eval("BPE_NUTS3 <- BPE_MUNICIPALITY" +
" [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)] \n" +
" [aggr nb := count(nb) group by year, nuts3, facility_type];");
engine.eval("BPE_NUTS3 <- BPE_MUNICIPALITY[calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" +
"[aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];");

Dataset bpeNuts = (Dataset) engine.getContext().getAttribute("BPE_NUTS3");
Structured.DataStructure bpeNutsStructure = bpeNuts.getDataStructure();
Expand All @@ -123,8 +121,8 @@ public void bpeV1() throws ScriptException {
assertThat(bpeNutsStructure.get("facility_type").getType()).isEqualTo(String.class);
assertThat(bpeNutsStructure.get("facility_type").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);

assertThat(bpeNutsStructure.get("year").getType()).isEqualTo(String.class);
assertThat(bpeNutsStructure.get("year").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);
assertThat(bpeNutsStructure.get("TIME_PERIOD").getType()).isEqualTo(String.class);
assertThat(bpeNutsStructure.get("TIME_PERIOD").getRole()).isEqualTo(Dataset.Role.IDENTIFIER);


assertThat(bpeNutsStructure.get("nb").getType()).isEqualTo(Long.class);
Expand All @@ -146,7 +144,7 @@ public void bpeV1() throws ScriptException {
// Step 6
Structured.DataStructure censusStructure = TrevasSDMXUtils.buildStructureFromSDMX3("src/test/resources/DSD_BPE_CENSUS.xml", "LEGAL_POP");

SparkDataset censusNuts = new SparkDataset(
SparkDataset legalPop = new SparkDataset(
spark.read()
.option("header", "true")
.option("delimiter", ";")
Expand All @@ -155,13 +153,12 @@ public void bpeV1() throws ScriptException {
censusStructure
);

context.setAttribute("CENSUS_NUTS3_2021", censusNuts, ScriptContext.ENGINE_SCOPE);
context.setAttribute("LEGAL_POP", legalPop, ScriptContext.ENGINE_SCOPE);

engine.eval("CENSUS_NUTS3_2021 := CENSUS_NUTS3_2021 \n" +
" [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop]\n" +
" [filter year = \"2021\"]\n" +
" [calc pop := cast(pop, integer)]" +
" [drop year, NB_COM, POP_MUNI];");
engine.eval("CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" +
"[filter TIME_PERIOD = \"2021\"]\n" +
"[calc pop := cast(pop, integer)]\n" +
"[drop TIME_PERIOD, NB_COM, POP_MUNI];");

Dataset censusNuts2021 = (Dataset) engine.getContext().getAttribute("CENSUS_NUTS3_2021");
Structured.DataStructure censusNuts2021Structure = censusNuts2021.getDataStructure();
Expand All @@ -173,9 +170,8 @@ public void bpeV1() throws ScriptException {
assertThat(censusNuts2021Structure.get("pop").getRole()).isEqualTo(Dataset.Role.MEASURE);

// Step 7
engine.eval("GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3" +
" [filter facility_type = \"D201\" and year = \"2021\"]\n" +
" [drop facility_type, year];");
engine.eval("GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3[filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" +
"[drop facility_type, TIME_PERIOD];");

Dataset generalNuts = (Dataset) engine.getContext().getAttribute("GENERAL_PRACT_NUTS3_2021");
Structured.DataStructure generalNutsStructure = generalNuts.getDataStructure();
Expand All @@ -188,8 +184,8 @@ public void bpeV1() throws ScriptException {

// Step 8
engine.eval("BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" +
" [calc pract_per_10000_inhabitants := nb / pop * 10000]\n" +
" [drop nb, pop];");
"[calc pract_per_10000_inhabitants := nb / pop * 10000]\n" +
"[drop nb, pop];");

Dataset bpeCensus = (Dataset) engine.getContext().getAttribute("BPE_CENSUS_NUTS3_2021");
Structured.DataStructure bpeCensusStructure = bpeCensus.getDataStructure();
Expand Down
38 changes: 17 additions & 21 deletions vtl-sdmx/src/test/java/fr/insee/vtl/SDMXVTLWorkflowTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,15 @@ public class SDMXVTLWorkflowTest {

@BeforeEach
public void setUp() {
SparkSession.builder()
.appName("test")
.master("local")
.getOrCreate();

ScriptEngineManager mgr = new ScriptEngineManager();
engine = mgr.getEngineByExtension("vtl");

engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark");
}

@Disabled
Expand All @@ -54,16 +60,6 @@ void testRefFromRepo() {

@Test
void testGetEmptyDataset() {

SparkSession.builder()
.appName("test")
.master("local")
.getOrCreate();

ScriptEngineManager mgr = new ScriptEngineManager();
ScriptEngine engine = mgr.getEngineByExtension("vtl");
engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark");

ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml");
SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Java8Helpers.mapOf());
Map<String, Dataset> emptyDatasets = sdmxVtlWorkflow.getEmptyDatasets();
Expand All @@ -84,13 +80,13 @@ void testGetEmptyDataset() {
new Structured.DataStructure(Java8Helpers.listOf(
new Structured.Component("facility_type", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("municipality", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("year", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("nb", Long.class, Dataset.Role.MEASURE)
))
);
assertThat(result.get("BPE_NUTS3").getDataStructure()).isEqualTo(
new Structured.DataStructure(Java8Helpers.listOf(
new Structured.Component("year", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("TIME_PERIOD", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("facility_type", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("nuts3", String.class, Dataset.Role.IDENTIFIER),
new Structured.Component("nb", Long.class, Dataset.Role.MEASURE)
Expand All @@ -103,8 +99,8 @@ public void testGetRulesetsVTL() {
ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml");
SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Java8Helpers.mapOf());
assertThat(sdmxVtlWorkflow.getRulesetsVTL()).isEqualTo(
"define datapoint ruleset UNIQUE_MUNICIPALITY (valuedomain CL_DEPCOM) is\n" +
" MUNICIPALITY_FORMAT_RULE : match_characters(CL_DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" +
"define datapoint ruleset UNIQUE_MUNICIPALITY (variable DEPCOM) is\n" +
" MUNICIPALITY_FORMAT_RULE : match_characters(DEPCOM, \"[0-9]{5}|2[A-B][0-9]{3}\") errorcode \"Municipality code is not in the correct format\"\n" +
" end datapoint ruleset;\n" +
"\n" +
"define datapoint ruleset NUTS3_TYPES (variable facility_type, nb) is\n" +
Expand All @@ -126,24 +122,24 @@ public void testGetTransformationsVTL() {
" [rename ID_EQUIPEMENT to id, TYPEQU to facility_type, DEPCOM to municipality, REF_YEAR to year];\n" +
"\n" +
"// BPE aggregation by municipality, type and year\n" +
"BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type];\n" +
"BPE_MUNICIPALITY <- BPE_DETAIL_CLEAN [aggr nb := count(id) group by municipality, year, facility_type] [rename year to TIME_PERIOD];\n" +
"\n" +
"// BPE aggregation by NUTS 3, type and year\n" +
"BPE_NUTS3 <- BPE_MUNICIPALITY [calc nuts3 := if substr(municipality,1,2) = \"97\" then substr(municipality,1,3) else substr(municipality,1,2)]\n" +
" [aggr nb := count(nb) group by year, nuts3, facility_type];\n" +
" [aggr nb := count(nb) group by TIME_PERIOD, nuts3, facility_type];\n" +
"\n" +
"// BPE validation of facility types by NUTS 3\n" +
"CHECK_NUTS3_TYPES := check_datapoint(BPE_NUTS3, NUTS3_TYPES invalid);\n" +
"\n" +
"// Prepare 2021 census dataset by NUTS 3\n" +
"CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, TIME_PERIOD to year, POP_TOT to pop]\n" +
" [filter year = \"2021\"]\n" +
"CENSUS_NUTS3_2021 := LEGAL_POP [rename REF_AREA to nuts3, POP_TOT to pop]\n" +
" [filter TIME_PERIOD = \"2021\"]\n" +
" [calc pop := cast(pop, integer)]\n" +
" [drop year, NB_COM, POP_MUNI];\n" +
" [drop TIME_PERIOD, NB_COM, POP_MUNI];\n" +
"\n" +
"// Extract dataset on general practitioners from BPE by NUTS 3 in 2021\n" +
"GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and year = \"2021\"]\n" +
" [drop facility_type, year];\n" +
"GENERAL_PRACT_NUTS3_2021 := BPE_NUTS3 [filter facility_type = \"D201\" and TIME_PERIOD = \"2021\"]\n" +
" [drop facility_type, TIME_PERIOD];\n" +
"\n" +
"// Merge practitioners and legal population datasets by NUTS 3 in 2021 and compute an indicator\n" +
"BPE_CENSUS_NUTS3_2021 <- inner_join(GENERAL_PRACT_NUTS3_2021, CENSUS_NUTS3_2021)\n" +
Expand Down
Loading

0 comments on commit 7344c7d

Please sign in to comment.