Merge branch 'release/1.4.0' into master

InseeFr · Jun 8, 2024 · 419ced7 · 419ced7
2 parents 68739be + 9f483a6
commit 419ced7
Show file tree

Hide file tree

Showing 130 changed files with 35,603 additions and 1,987 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -23,6 +23,15 @@ jobs:
         with:
           java-version: 17
           distribution: "adopt"
+      - uses: s4u/[email protected]
+        with:
+          githubServer: false
+          servers: |
+            [{
+                "id": "Github",
+                "username": "${{ secrets.GH_PACKAGES_USERNAME }}",
+                "password": "${{ secrets.GH_PACKAGES_PASSWORD }}"
+            }]
       - name: Test
         run: mvn test
   package:
@@ -39,6 +48,15 @@ jobs:
         with:
           java-version: 17
           distribution: "adopt"
+      - uses: s4u/[email protected]
+        with:
+          githubServer: false
+          servers: |
+            [{
+                "id": "Github",
+                "username": "${{ secrets.GH_PACKAGES_USERNAME }}",
+                "password": "${{ secrets.GH_PACKAGES_PASSWORD }}"
+            }]
       - name: Test
         run: mvn package
   test-sonar-package:
@@ -59,6 +77,15 @@ jobs:
         with:
           java-version: 17
           distribution: "adopt"
+      - uses: s4u/[email protected]
+        with:
+          githubServer: false
+          servers: |
+            [{
+                "id": "Github",
+                "username": "${{ secrets.GH_PACKAGES_USERNAME }}",
+                "password": "${{ secrets.GH_PACKAGES_PASSWORD }}"
+            }]
       - name: Test, package and analyze with maven & SonarCloud
         run: mvn verify sonar:sonar -Pcoverage -Dsonar.projectKey=InseeFr_Trevas -Dsonar.organization=inseefr -Dsonar.host.url=https://sonarcloud.io
         env:
@@ -82,6 +109,15 @@ jobs:
           server-password: MAVEN_PASSWORD
           gpg-private-key: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }}
           gpg-passphrase: MAVEN_GPG_PASSPHRASE
+      - uses: s4u/[email protected]
+        with:
+          githubServer: false
+          servers: |
+            [{
+                "id": "Github",
+                "username": "${{ secrets.GH_PACKAGES_USERNAME }}",
+                "password": "${{ secrets.GH_PACKAGES_PASSWORD }}"
+            }]
       - name: Publish Trevas
         run: mvn -B -Prelease deploy
         env:
@@ -106,8 +142,17 @@ jobs:
         with:
           java-version: 17
           distribution: "adopt"
+      - uses: s4u/[email protected]
+        with:
+          githubServer: false
+          servers: |
+            [{
+                "id": "Github",
+                "username": "${{ secrets.GH_PACKAGES_USERNAME }}",
+                "password": "${{ secrets.GH_PACKAGES_PASSWORD }}"
+            }]
       - name: Publish package
         run: mvn --batch-mode deploy
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          MAVEN_OPTS: -Dorg.slf4j.simpleLogger.defaultLogLevel='warn'
+          MAVEN_OPTS: -Dorg.slf4j.simpleLogger.defaultLogLevel='warn'
diff --git a/coverage/pom.xml b/coverage/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>fr.insee.trevas</groupId>
         <artifactId>trevas-parent</artifactId>
-        <version>1.3.1</version>
+        <version>1.4.0-SNAPSHOT</version>
     </parent>
 
     <artifactId>coverage</artifactId>
@@ -22,32 +22,32 @@
         <dependency>
             <groupId>fr.insee.trevas</groupId>
             <artifactId>vtl-engine</artifactId>
-            <version>1.3.1</version>
+            <version>1.4.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>fr.insee.trevas</groupId>
             <artifactId>vtl-jackson</artifactId>
-            <version>1.3.1</version>
+            <version>1.4.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>fr.insee.trevas</groupId>
             <artifactId>vtl-jdbc</artifactId>
-            <version>1.3.1</version>
+            <version>1.4.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>fr.insee.trevas</groupId>
             <artifactId>vtl-model</artifactId>
-            <version>1.3.1</version>
+            <version>1.4.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>fr.insee.trevas</groupId>
             <artifactId>vtl-parser</artifactId>
-            <version>1.3.1</version>
+            <version>1.4.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>fr.insee.trevas</groupId>
             <artifactId>vtl-spark</artifactId>
-            <version>1.3.1</version>
+            <version>1.4.0-SNAPSHOT</version>
         </dependency>
     </dependencies>
     <build>

diff --git a/docs/blog/2024-06-07-temporal-operators.mdx b/docs/blog/2024-06-07-temporal-operators.mdx
@@ -0,0 +1,103 @@
+---
+slug: /trevas-temporal-operators
+title: Trevas - Temporal operators
+authors: [hadrien]
+tags: [Trevas]
+---
+
+### Temporal operators in Trevas
+
+The version 1.4.0 of Trevas introduces preliminary support for date and time types
+and operators.
+
+The specification describes temporal types such as `date`, `time_period`, `time`, and `duration`. However, Trevas authors find
+these descriptions unsatisfactory. This blog post outlines our implementation choices and how they differ from the spec.
+
+In the specification, `time_period` (and the types `date`) is described as a compound type with a start and end (or a
+start and a duration). This complicates the implementation and brings little value to the language as one can simply
+operate on a combination of dates or date and duration directly. For this reason, we defined an algebra between the
+temporal types and did not yet implement the `time_period`.
+
+| result (operators) | date        | duration        | number        |
+| ------------------ | ----------- | --------------- | ------------- |
+| **date**           | n/a         | date (+, -)     | n/a           |
+| **duration**       | date (+, -) | duration (+, -) | duration (\*) |
+| **number**         | n/a         | duration (\*)   | n/a           |
+
+The `period_indicator` function relies on period-awareness for types that are not defined enough at the moment to
+be implemented.
+
+#### Java mapping
+
+The VTL type `date` is represented internally as the
+types [`java.time.Instant`](https://docs.oracle.com/en%2Fjava%2Fjavase%2F11%2Fdocs%2Fapi%2F%2F/java.base/java/time/Instant.html),
+[`java.time.ZonedDateTime`](https://docs.oracle.com/en%2Fjava%2Fjavase%2F11%2Fdocs%2Fapi%2F%2F/java.base/java/time/ZonedDateTime.html)
+and [`java.time.OffsetDateTime`](https://docs.oracle.com/en%2Fjava%2Fjavase%2F11%2Fdocs%2Fapi%2F%2F/java.base/java/time/OffsetDateTime.html#:~:text=OffsetDateTime%20is%20an%20immutable%20representation,be%20stored%20in%20an%20OffsetDateTime%20.)
+
+Instant represent a specific moment in time. Note that this type does not include timezone information and is therefore
+not usable with all the operators. One can use the types `ZonedDateTime` and `OffsetDateTime` when timezone or time
+saving is required.
+
+The VTL type `duration` is represented internally as the
+type [`org.threeten.extra.PeriodDuration`](https://www.threeten.org/threeten-extra/apidocs/org.threeten.extra/org/threeten/extra/PeriodDuration.html)
+from the [threeten extra](https://www.threeten.org/threeten-extra/) package.
+It represents a duration using both calendar units (years, months, days) and a temporal amount (hours, minutes, seconds
+and nanoseconds).
+
+#### Function `flow_to_stock`
+
+The flow_to_stock function converts a data set with flow interpretation into a stock interpretation. This transformation
+is useful when you want to aggregate flow data (e.g., sales or production rates) into cumulative stock data (e.g., total
+inventory).
+
+**Syntax:**
+
+`result := flow_to_stock(op)`
+
+**Parameters:**
+
+- `op` - The input data set with flow interpretation. The data set must have an identifier of type `time`, additional
+  identifiers, and at least one measure of type `number`.
+
+**Result:**
+
+The function returns a data set with the same structure as the input, but with the values converted to stock
+interpretation.
+
+#### Function `stock_to_flow`
+
+The `stock_to_flow` function converts a data set with stock interpretation into a flow interpretation. This
+transformation is useful when you want to derive flow data from cumulative stock data.
+
+**Syntax:**
+
+`result := stock_to_flow(op)`
+
+**Parameters:**
+
+- `op` - The input data set with stock interpretation. The data set must have an identifier of type `time`, additional
+  identifiers, and at least one measure of type `number`.
+
+**Result:**
+
+The function returns a data set with the same structure as the input, but with the values converted to flow
+interpretation.
+
+#### Function `timeshift`
+
+The `timeshift` function shifts the time component of a specified range of time in the data set. This is useful for
+analyzing data at different time offsets, such as comparing current values to past values.
+
+**Syntax:**
+
+`result := timeshift(op, shiftNumber)`
+
+**Parameters:**
+
+- `op` - The operand data set containing time series.
+- `shiftNumber` - An integer representing the number of periods to shift. Positive values shift forward in time, while
+  negative values shift backward.
+
+**Result:**
+
+The function returns a data set with the time identifiers shifted by the specified number of periods.
diff --git a/docs/blog/2024-06-07-trevas-sdmx.mdx b/docs/blog/2024-06-07-trevas-sdmx.mdx
@@ -0,0 +1,155 @@
+---
+slug: /trevas-sdmx
+title: Trevas - SDMX
+authors: [nicolas]
+tags: [Trevas, SDMX]
+---
+
+import useBaseUrl from '@docusaurus/useBaseUrl';
+import ThemedImage from '@theme/ThemedImage';
+
+### News
+
+Trevas 1.4.0 introduces the VTL SDMX module.
+
+This module enables to consume SDMX metadata sources to instantiate Trevas DataStructures and Datasets.
+
+It also allows to execute the VTL TransformationSchemes to obtain the resulting persistent datasets.
+
+#### Overview
+
+<div style={{ textAlign: 'center' }}>
+	<ThemedImage
+		alt="VTL SDMX Diagram"
+		sources={{
+			light: useBaseUrl('/img/vtl-sdmx-light.svg'),
+			dark: useBaseUrl('/img/vtl-sdmx-dark.svg'),
+		}}
+	/>
+</div>
+
+Trevas supports the above SDMX message elements. Only the VtlMappingSchemes attribute is optional.
+
+The elements in box 1 are used to produce Trevas DataStructures, filling VTL components attributes name, role, type, nullable and valuedomain.
+
+The elements in box 2 are used to generate the VTL code (rulesets & transformations).
+
+#### Tools available
+
+#### `buildStructureFromSDMX3` utility
+
+`TrevasSDMXUtils.buildStructureFromSDMX3` allows to obtain a Trevas DataStructure.
+
+Providing corresponding data, you can build a Trevas Dataset.
+
+```java
+Structured.DataStructure structure = TrevasSDMXUtils.buildStructureFromSDMX3("path/sdmx_file.xml", "STRUCT_ID");
+
+SparkDataset ds = new SparkDataset(
+        spark.read()
+                .option("header", "true")
+                .option("delimiter", ";")
+                .option("quote", "\"")
+                .csv("path"),
+        structure
+);
+```
+
+#### `SDMXVTLWorkflow` object
+
+The `SDMXVTLWorkflow` constructor takes 3 arguments:
+
+- a `ScriptEngine` (Trevas or another)
+- a `ReadableDataLocation` to handle an SDMX message
+- a map of names / Datasets
+
+```java
+SparkSession.builder()
+                .appName("test")
+                .master("local")
+                .getOrCreate();
+
+ScriptEngineManager mgr = new ScriptEngineManager();
+ScriptEngine engine = mgr.getEngineByExtension("vtl");
+engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark");
+
+ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml");
+
+SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of());
+```
+
+This object then allows you to activate the following 3 functions.
+
+#### SDMXVTLWorkflow `run` function - Preview mode
+
+The `run` function can easily be called in a preview mode, without attached data.
+
+```java
+ScriptEngineManager mgr = new ScriptEngineManager();
+ScriptEngine engine = mgr.getEngineByExtension("vtl");
+engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark");
+
+ReadableDataLocation rdl = new ReadableDataLocationTmp("src/test/resources/DSD_BPE_CENSUS.xml");
+
+SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of());
+
+// instead of using TrevasSDMXUtils.buildStructureFromSDMX3 and data sources
+// to build Trevas Datasets, sdmxVtlWorkflow.getEmptyDatasets()
+// will handle SDMX message structures to produce Trevas Datasets
+// with metadata defined in this message, and adding empty data
+Map<String, Dataset> emptyDatasets = sdmxVtlWorkflow.getEmptyDatasets();
+engine.getBindings(ScriptContext.ENGINE_SCOPE).putAll(emptyDatasets);
+
+Map<String, PersistentDataset> result = sdmxVtlWorkflow.run();
+```
+
+The preview mode allows to check the conformity of the SDMX file and the metadata of the output datasets.
+
+#### SDMXVTLWorkflow `run` function
+
+Once an `SDMXVTLWorkflow` is built, it is easy to run the VTL validations and transformations defined in the SDMX file.
+
+```java
+Structured.DataStructure structure = TrevasSDMXUtils.buildStructureFromSDMX3("path/sdmx_file.xml", "ds1");
+
+SparkDataset ds1 = new SparkDataset(
+        spark.read()
+                .option("header", "true")
+                .option("delimiter", ";")
+                .option("quote", "\"")
+                .csv("path/data.csv"),
+        structure
+);
+
+ScriptEngineManager mgr = new ScriptEngineManager();
+ScriptEngine engine = mgr.getEngineByExtension("vtl");
+engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark");
+
+Map<String, Dataset> inputs = Map.of("ds1", ds1);
+
+ReadableDataLocation rdl = new ReadableDataLocationTmp("path/sdmx_file.xml");
+
+SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, inputs);
+
+Map<String, PersistentDataset> bindings = sdmxVtlWorkflow.run();
+```
+
+As a result, one will receive all the dataset defined as persistent in the `TransformationSchemes` definition.
+
+#### SDMXVTLWorkflow `getTransformationsVTL` function
+
+Gets the VTL code corresponding to the SDMX TransformationSchemes definition.
+
+```java
+SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of());
+String vtl = sdmxVtlWorkflow.getTransformationsVTL();
+```
+
+#### SDMXVTLWorkflow `getRulesetsVTL` function
+
+Gets the VTL code corresponding to the SDMX TransformationSchemes definition.
+
+```java
+SDMXVTLWorkflow sdmxVtlWorkflow = new SDMXVTLWorkflow(engine, rdl, Map.of());
+String dprs = sdmxVtlWorkflow.getRulesetsVTL();
+```