diff --git a/README.md b/README.md index b61c963..09b14fa 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ # ExeKGLib -# ExeKGLib ![PyPI](https://img.shields.io/pypi/v/exe-kg-lib) ![Python](https://img.shields.io/badge/python-v3.8+-blue.svg) @@ -29,7 +28,7 @@ _Klironomos A., Zhou B., Tan Z., Zheng Z., Gad-Elrab M., Paulheim H., Kharlamov Detailed information (installation, documentation etc.) about **ExeKGLib** can be found in [its website](https://boschresearch.github.io/ExeKGLib/) and basic information is shown below. -To download, run `pip install exe-kg-lib`. +## Installation [//]: # (--8<-- [start:installation]) To install, run `pip install exe-kg-lib`. @@ -43,24 +42,24 @@ For detailed installation instructions, refer to the [installation page](https:/
Click to expand -[//]: # (--8<-- [start:supportedmethods]) + | KG schema (abbreviation) | Task | Method | Properties | Input (data structure) | Output (data structure) | Implemented by Python class | | ------------------------ | ------------------------- | ---------------------------- | --------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------- | -| Machine Learning (ML) | Train | KNNTrain | \- | DataInTrainX (Matrix or Vector)
DataInTrainY (Matrix or Vector) | DataOutPredictedValueTrain (Matrix or Vector)
DataOutTrainModel (SingleValue) | TrainKNNTrain | -| Machine Learning (ML) | Train | MLPTrain | \- | DataInTrainX (Matrix or Vector)
DataInTrainY (Matrix or Vector) | DataOutPredictedValueTrain (Matrix or Vector)
DataOutTrainModel (SingleValue) | TrainMLPTrain | -| Machine Learning (ML) | Train | LRTrain | \- | DataInTrainX (Matrix or Vector)
DataInTrainY (Matrix or Vector) | DataOutPredictedValueTrain (Matrix or Vector)
DataOutTrainModel (SingleValue) | TrainLRTrain | -| Machine Learning (ML) | Test | KNNTest | \- | DataInTestModel (SingleValue)
DataInTestX (Matrix or Vector) | DataOutPredictedValueTest (Matrix or Vector) | TestKNNTest | -| Machine Learning (ML) | Test | MLPTest | \- | DataInTestModel (SingleValue)
DataInTestX (Matrix or Vector) | DataOutPredictedValueTest (Matrix or Vector) | TestMLPTest | -| Machine Learning (ML) | Test | LRTest | \- | DataInTestModel (SingleValue)
DataInTestX (Matrix or Vector) | DataOutPredictedValueTest (Matrix or Vector) | TestLRTest | -| Machine Learning (ML) | PerformanceCalculation | PerformanceCalculationMethod | \- | DataInTrainRealY (Matrix or Vector)
DataInTrainPredictedY (Matrix or Vector)
DataInTestPredictedY (Matrix or Vector)
DataInTestRealY (Matrix or Vector) | DataOutMLTestErr (Vector)
DataOutMLTrainErr (Vector) | PerformanceCalculationPerformanceCalculationMethod | -| Machine Learning (ML) | Concatenation | ConcatenationMethod | \- | DataInConcatenation (list of Vector) | DataOutConcatenatedData (Matrix) | ConcatenationConcatenationMethod | -| Machine Learning (ML) | DataSplitting | DataSplittingMethod | \- | DataInDataSplittingX (Matrix or Vector)
DataInDataSplittingY (Matrix or Vector) | DataOutSplittedTestDataX (Matrix or Vector)
DataOutSplittedTrainDataY (Matrix or Vector)
DataOutSplittedTrainDataX (Matrix or Vector)
DataOutSplittedTestDataY (Matrix or Vector) | DataSplittingDataSplittingMethod | -| Visualization (Visu) | CanvasTask | CanvasMethod | hasCanvasName (string)
hasLayout (string) | \- | \- | CanvasTaskCanvasMethod | -| Visualization (Visu) | PlotTask | LineplotMethod | hasLineStyle (string)
hasLineWidth (int)
hasLegendName (string) | DataInVector (Vector) | \- | PlotTaskLineplotMethod | -| Visualization (Visu) | PlotTask | ScatterplotMethod | hasLineStyle (string)
hasLineWidth (int)
hasScatterSize (int)
hasLegendName (string) | DataInVector (Vector) | \- | PlotTaskScatterplotMethod | -| Statistics (Stats) | TrendCalculationTask | TrendCalculationMethod | \- | DataInTrendCalculation (Vector) | DataOutTrendCalculation (Vector) | TrendCalculationTaskTrendCalculationMethod | -| Statistics (Stats) | NormalizationTask | NormalizationMethod | \- | DataInNormalization (Vector) | DataOutNormalization (Vector) | NormalizationTaskNormalizationMethod | -| Statistics (Stats) | ScatteringCalculationTask | ScatteringCalculationMethod | \- | DataInScatteringCalculation (Vector) | DataOutScatteringCalculation (Vector) | ScatteringCalculationTaskScatteringCalculationMethod | +| Machine Learning (ml) | Train | KNNTrain | \- | DataInTrainX (Matrix or Vector)
DataInTrainY (Matrix or Vector) | DataOutPredictedValueTrain (Matrix or Vector)
DataOutTrainModel (SingleValue) | TrainKNNTrain | +| Machine Learning (ml) | Train | MLPTrain | \- | DataInTrainX (Matrix or Vector)
DataInTrainY (Matrix or Vector) | DataOutPredictedValueTrain (Matrix or Vector)
DataOutTrainModel (SingleValue) | TrainMLPTrain | +| Machine Learning (ml) | Train | LRTrain | \- | DataInTrainX (Matrix or Vector)
DataInTrainY (Matrix or Vector) | DataOutPredictedValueTrain (Matrix or Vector)
DataOutTrainModel (SingleValue) | TrainLRTrain | +| Machine Learning (ml) | Test | KNNTest | \- | DataInTestModel (SingleValue)
DataInTestX (Matrix or Vector) | DataOutPredictedValueTest (Matrix or Vector) | TestKNNTest | +| Machine Learning (ml) | Test | MLPTest | \- | DataInTestModel (SingleValue)
DataInTestX (Matrix or Vector) | DataOutPredictedValueTest (Matrix or Vector) | TestMLPTest | +| Machine Learning (ml) | Test | LRTest | \- | DataInTestModel (SingleValue)
DataInTestX (Matrix or Vector) | DataOutPredictedValueTest (Matrix or Vector) | TestLRTest | +| Machine Learning (ml) | PerformanceCalculation | PerformanceCalculationMethod | \- | DataInTrainRealY (Matrix or Vector)
DataInTrainPredictedY (Matrix or Vector)
DataInTestPredictedY (Matrix or Vector)
DataInTestRealY (Matrix or Vector) | DataOutMLTestErr (Vector)
DataOutMLTrainErr (Vector) | PerformanceCalculationPerformanceCalculationMethod | +| Machine Learning (ml) | Concatenation | ConcatenationMethod | \- | DataInConcatenation (list of Vector) | DataOutConcatenatedData (Matrix) | ConcatenationConcatenationMethod | +| Machine Learning (ml) | DataSplitting | DataSplittingMethod | \- | DataInDataSplittingX (Matrix or Vector)
DataInDataSplittingY (Matrix or Vector) | DataOutSplittedTestDataX (Matrix or Vector)
DataOutSplittedTrainDataY (Matrix or Vector)
DataOutSplittedTrainDataX (Matrix or Vector)
DataOutSplittedTestDataY (Matrix or Vector) | DataSplittingDataSplittingMethod | +| Visualization (visu) | CanvasTask | CanvasMethod | hasCanvasName (string)
hasLayout (string) | \- | \- | CanvasTaskCanvasMethod | +| Visualization (visu) | PlotTask | LineplotMethod | hasLineStyle (string)
hasLineWidth (int)
hasLegendName (string) | DataInVector (Vector) | \- | PlotTaskLineplotMethod | +| Visualization (visu) | PlotTask | ScatterplotMethod | hasLineStyle (string)
hasLineWidth (int)
hasScatterSize (int)
hasLegendName (string) | DataInVector (Vector) | \- | PlotTaskScatterplotMethod | +| Statistics (stats) | TrendCalculationTask | TrendCalculationMethod | \- | DataInTrendCalculation (Vector) | DataOutTrendCalculation (Vector) | TrendCalculationTaskTrendCalculationMethod | +| Statistics (stats) | NormalizationTask | NormalizationMethod | \- | DataInNormalization (Vector) | DataOutNormalization (Vector) | NormalizationTaskNormalizationMethod | +| Statistics (stats) | ScatteringCalculationTask | ScatteringCalculationMethod | \- | DataInScatteringCalculation (Vector) | DataOutScatteringCalculation (Vector) | ScatteringCalculationTaskScatteringCalculationMethod | [//]: # (--8<-- [end:supportedmethods]) diff --git a/docs/installation.md b/docs/installation.md index c4998ba..c5b3017 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -51,7 +51,7 @@ before ranting! :pray: conda deactivate ``` -### Step 1: Dependency Installation +## Step 1: Dependency Installation The installation of the project's dependencies should be piece of :cake: in most cases by running @@ -75,7 +75,7 @@ from within the project directory. | _"I get a `ConnectionError`"_ | Maybe you have proxy issues. | | _"I destroyed my poetry environment"_ | Delete the `.venv` folder and create a new env. | -### Step 2: Pre-commit Git Hooks Installation +## Step 2: Pre-commit Git Hooks Installation To ensure compatibility of each future commit with the project's conventions (e.g. code format), some predefined git hooks should be installed by running the following commands. diff --git a/examples/ml_pipeline_creation.py b/examples/ml_pipeline_creation.py index 6f07fda..d25cff8 100644 --- a/examples/ml_pipeline_creation.py +++ b/examples/ml_pipeline_creation.py @@ -4,7 +4,7 @@ from exe_kg_lib import ExeKG if __name__ == "__main__": - exe_kg = ExeKG(kg_schema_name="Machine Learning") + exe_kg = ExeKG() feature_columns = ["feature_1", "feature_2", "feature_3", "feature_4", "feature_5"] label_column = "label" @@ -33,20 +33,22 @@ ) concatenate_task = exe_kg.add_task( - task_type="Concatenation", + kg_schema_short="ml", + task="Concatenation", input_data_entity_dict={"DataInConcatenation": feature_data_entities}, - method_type="ConcatenationMethod", - data_properties={}, + method="ConcatenationMethod", + properties_dict={}, ) data_splitting_task = exe_kg.add_task( - task_type="DataSplitting", + kg_schema_short="ml", + task="DataSplitting", input_data_entity_dict={ "DataInDataSplittingX": [concatenate_task.output_dict["DataOutConcatenatedData"]], "DataInDataSplittingY": [label_data_entity], }, - method_type="DataSplittingMethod", - data_properties={"hasSplitRatio": 0.8}, + method="DataSplittingMethod", + properties_dict={"hasSplitRatio": 0.8}, ) train_x = data_splitting_task.output_dict["DataOutSplittedTrainDataX"] @@ -55,80 +57,83 @@ test_real_y = data_splitting_task.output_dict["DataOutSplittedTestDataY"] knn_train_task = exe_kg.add_task( - task_type="Train", + kg_schema_short="ml", + task="Train", input_data_entity_dict={ "DataInTrainX": [train_x], "DataInTrainY": [train_real_y], }, - method_type="KNNTrain", - data_properties={}, + method="KNNTrain", + properties_dict={}, ) model = knn_train_task.output_dict["DataOutTrainModel"] train_predicted_y = knn_train_task.output_dict["DataOutPredictedValueTrain"] knn_test_task = exe_kg.add_task( - task_type="Test", + kg_schema_short="ml", + task="Test", input_data_entity_dict={ "DataInTestModel": [model], "DataInTestX": [test_x], }, - method_type="KNNTest", - data_properties={}, + method="KNNTest", + properties_dict={}, ) test_predicted_y = knn_test_task.output_dict["DataOutPredictedValueTest"] performance_calc_task = exe_kg.add_task( - task_type="PerformanceCalculation", + kg_schema_short="ml", + task="PerformanceCalculation", input_data_entity_dict={ "DataInTrainRealY": [train_real_y], "DataInTrainPredictedY": [train_predicted_y], "DataInTestRealY": [test_real_y], "DataInTestPredictedY": [test_predicted_y], }, - method_type="PerformanceCalculationMethod", - data_properties={}, + method="PerformanceCalculationMethod", + properties_dict={}, ) train_error = performance_calc_task.output_dict["DataOutMLTrainErr"] test_error = performance_calc_task.output_dict["DataOutMLTestErr"] canvas_task = exe_kg.add_task( - task_type="CanvasTask", + kg_schema_short="visu", + task="CanvasTask", input_data_entity_dict={}, - method_type="CanvasMethod", - data_properties={"hasCanvasName": "MyCanvas", "hasLayout": "1 1"}, - visualization=True, + method="CanvasMethod", + properties_dict={"hasCanvasName": "MyCanvas", "hasLayout": "1 1"}, ) train_error_lineplot_task = exe_kg.add_task( - task_type="PlotTask", + kg_schema_short="visu", + task="PlotTask", input_data_entity_dict={ "DataInVector": [train_error], }, - method_type="ScatterplotMethod", - data_properties={ + method="ScatterplotMethod", + properties_dict={ "hasLegendName": "Train error", "hasLineStyle": "o", "hasScatterStyle": "o", "hasLineWidth": 1, "hasScatterSize": 1, }, - visualization=True, ) test_error_lineplot_task = exe_kg.add_task( - task_type="PlotTask", + kg_schema_short="visu", + task="PlotTask", input_data_entity_dict={ "DataInVector": [test_error], }, - method_type="ScatterplotMethod", - data_properties={ + method="ScatterplotMethod", + properties_dict={ "hasLegendName": "Test error", "hasLineStyle": "o", "hasScatterStyle": "o", "hasLineWidth": 1, "hasScatterSize": 1, }, - visualization=True, ) exe_kg.save_created_kg(f"./pipelines/{pipeline_name}.ttl") diff --git a/examples/pipelines/MLPipeline.ttl b/examples/pipelines/MLPipeline.ttl index 754488c..1090f27 100644 --- a/examples/pipelines/MLPipeline.ttl +++ b/examples/pipelines/MLPipeline.ttl @@ -3,17 +3,39 @@ @prefix visu: . @prefix xsd: . -ml:MLPipeline a ds:Pipeline ; +ds:MLPipeline a ds:Pipeline ; ds:hasInputDataPath "./examples/data/dummy_data.csv"^^xsd:string ; ds:hasStartTask ml:Concatenation1 . -ml:CanvasMethod1 a visu:CanvasMethod . +ds:feature_1 a ds:DataEntity ; + ds:hasDataSemantics ds:TimeSeries ; + ds:hasDataStructure ds:Vector ; + ds:hasSource "feature_1"^^xsd:string . -ml:CanvasTask1 a visu:CanvasTask ; - ds:hasNextTask ml:PlotTask1 ; - visu:hasCanvasMethod ml:CanvasMethod1 ; - visu:hasCanvasName "MyCanvas"^^xsd:string ; - visu:hasLayout "1 1"^^ds:intPair . +ds:feature_2 a ds:DataEntity ; + ds:hasDataSemantics ds:TimeSeries ; + ds:hasDataStructure ds:Vector ; + ds:hasSource "feature_2"^^xsd:string . + +ds:feature_3 a ds:DataEntity ; + ds:hasDataSemantics ds:TimeSeries ; + ds:hasDataStructure ds:Vector ; + ds:hasSource "feature_3"^^xsd:string . + +ds:feature_4 a ds:DataEntity ; + ds:hasDataSemantics ds:TimeSeries ; + ds:hasDataStructure ds:Vector ; + ds:hasSource "feature_4"^^xsd:string . + +ds:feature_5 a ds:DataEntity ; + ds:hasDataSemantics ds:TimeSeries ; + ds:hasDataStructure ds:Vector ; + ds:hasSource "feature_5"^^xsd:string . + +ds:label a ds:DataEntity ; + ds:hasDataSemantics ds:TimeSeries ; + ds:hasDataStructure ds:Vector ; + ds:hasSource "label"^^xsd:string . ml:Concatenation1 a ml:Concatenation ; ds:hasInput ml:DataInConcatenation1_1, @@ -28,25 +50,25 @@ ml:Concatenation1 a ml:Concatenation ; ml:ConcatenationMethod1 a ml:ConcatenationMethod . ml:DataInConcatenation1_1 a ml:DataInConcatenation ; - ds:hasReference ml:feature_1 . + ds:hasReference ds:feature_1 . ml:DataInConcatenation1_2 a ml:DataInConcatenation ; - ds:hasReference ml:feature_2 . + ds:hasReference ds:feature_2 . ml:DataInConcatenation1_3 a ml:DataInConcatenation ; - ds:hasReference ml:feature_3 . + ds:hasReference ds:feature_3 . ml:DataInConcatenation1_4 a ml:DataInConcatenation ; - ds:hasReference ml:feature_4 . + ds:hasReference ds:feature_4 . ml:DataInConcatenation1_5 a ml:DataInConcatenation ; - ds:hasReference ml:feature_5 . + ds:hasReference ds:feature_5 . ml:DataInDataSplittingX1_1 a ml:DataInDataSplittingX ; ds:hasReference ml:DataOutConcatenatedData1 . ml:DataInDataSplittingY1_1 a ml:DataInDataSplittingY ; - ds:hasReference ml:label . + ds:hasReference ds:label . ml:DataInTestModel1_1 a ml:DataInTestModel ; ds:hasReference ml:DataOutTrainModel1 . @@ -94,36 +116,13 @@ ml:PerformanceCalculation1 a ml:PerformanceCalculation ; ml:DataInTestRealY1_1, ml:DataInTrainPredictedY1_1, ml:DataInTrainRealY1_1 ; - ds:hasNextTask ml:CanvasTask1 ; + ds:hasNextTask visu:CanvasTask1 ; ds:hasOutput ml:DataOutMLTestErr1, ml:DataOutMLTrainErr1 ; ml:hasPerformanceCalculationMethod ml:PerformanceCalculationMethod1 . ml:PerformanceCalculationMethod1 a ml:PerformanceCalculationMethod . -ml:PlotTask1 a visu:PlotTask ; - ds:hasInput visu:DataInVector1_1 ; - ds:hasNextTask ml:PlotTask2 ; - visu:hasLegendName "Train error"^^xsd:string ; - visu:hasLineStyle "o"^^xsd:string ; - visu:hasLineWidth "1"^^xsd:int ; - visu:hasPlotMethod ml:ScatterplotMethod1 ; - visu:hasScatterSize "1"^^xsd:int ; - visu:hasScatterStyle "o"^^xsd:string . - -ml:PlotTask2 a visu:PlotTask ; - ds:hasInput visu:DataInVector2_1 ; - visu:hasLegendName "Test error"^^xsd:string ; - visu:hasLineStyle "o"^^xsd:string ; - visu:hasLineWidth "1"^^xsd:int ; - visu:hasPlotMethod ml:ScatterplotMethod2 ; - visu:hasScatterSize "1"^^xsd:int ; - visu:hasScatterStyle "o"^^xsd:string . - -ml:ScatterplotMethod1 a visu:ScatterplotMethod . - -ml:ScatterplotMethod2 a visu:ScatterplotMethod . - ml:Test1 a ml:Test ; ds:hasInput ml:DataInTestModel1_1, ml:DataInTestX1_1 ; @@ -139,35 +138,13 @@ ml:Train1 a ml:Train ; ml:DataOutTrainModel1 ; ml:hasTrainMethod ml:KNNTrain1 . -ml:feature_1 a ds:DataEntity ; - ds:hasDataSemantics ds:TimeSeries ; - ds:hasDataStructure ds:Vector ; - ds:hasSource "feature_1"^^xsd:string . - -ml:feature_2 a ds:DataEntity ; - ds:hasDataSemantics ds:TimeSeries ; - ds:hasDataStructure ds:Vector ; - ds:hasSource "feature_2"^^xsd:string . - -ml:feature_3 a ds:DataEntity ; - ds:hasDataSemantics ds:TimeSeries ; - ds:hasDataStructure ds:Vector ; - ds:hasSource "feature_3"^^xsd:string . - -ml:feature_4 a ds:DataEntity ; - ds:hasDataSemantics ds:TimeSeries ; - ds:hasDataStructure ds:Vector ; - ds:hasSource "feature_4"^^xsd:string . +visu:CanvasMethod1 a visu:CanvasMethod . -ml:feature_5 a ds:DataEntity ; - ds:hasDataSemantics ds:TimeSeries ; - ds:hasDataStructure ds:Vector ; - ds:hasSource "feature_5"^^xsd:string . - -ml:label a ds:DataEntity ; - ds:hasDataSemantics ds:TimeSeries ; - ds:hasDataStructure ds:Vector ; - ds:hasSource "label"^^xsd:string . +visu:CanvasTask1 a visu:CanvasTask ; + ds:hasNextTask visu:PlotTask1 ; + visu:hasCanvasMethod visu:CanvasMethod1 ; + visu:hasCanvasName "MyCanvas"^^xsd:string ; + visu:hasLayout "1 1"^^ds:intPair . visu:DataInVector1_1 a visu:DataInVector ; ds:hasReference ml:DataOutMLTrainErr1 . @@ -175,6 +152,29 @@ visu:DataInVector1_1 a visu:DataInVector ; visu:DataInVector2_1 a visu:DataInVector ; ds:hasReference ml:DataOutMLTestErr1 . +visu:PlotTask1 a visu:PlotTask ; + ds:hasInput visu:DataInVector1_1 ; + ds:hasNextTask visu:PlotTask2 ; + visu:hasLegendName "Train error"^^xsd:string ; + visu:hasLineStyle "o"^^xsd:string ; + visu:hasLineWidth "1"^^xsd:int ; + visu:hasPlotMethod visu:ScatterplotMethod1 ; + visu:hasScatterSize "1"^^xsd:int ; + visu:hasScatterStyle "o"^^xsd:string . + +visu:PlotTask2 a visu:PlotTask ; + ds:hasInput visu:DataInVector2_1 ; + visu:hasLegendName "Test error"^^xsd:string ; + visu:hasLineStyle "o"^^xsd:string ; + visu:hasLineWidth "1"^^xsd:int ; + visu:hasPlotMethod visu:ScatterplotMethod2 ; + visu:hasScatterSize "1"^^xsd:int ; + visu:hasScatterStyle "o"^^xsd:string . + +visu:ScatterplotMethod1 a visu:ScatterplotMethod . + +visu:ScatterplotMethod2 a visu:ScatterplotMethod . + ml:DataOutConcatenatedData1 a ds:DataEntity . ml:DataOutMLTestErr1 a ds:DataEntity . diff --git a/examples/pipelines/StatsPipeline.ttl b/examples/pipelines/StatsPipeline.ttl index 4fe4e31..10689b1 100644 --- a/examples/pipelines/StatsPipeline.ttl +++ b/examples/pipelines/StatsPipeline.ttl @@ -3,61 +3,61 @@ @prefix visu: . @prefix xsd: . -stats:StatsPipeline a ds:Pipeline ; +ds:StatsPipeline a ds:Pipeline ; ds:hasInputDataPath "./examples/data/dummy_data.csv"^^xsd:string ; ds:hasStartTask stats:NormalizationTask1 . -stats:CanvasMethod1 a visu:CanvasMethod . - -stats:CanvasTask1 a visu:CanvasTask ; - ds:hasNextTask stats:PlotTask1 ; - visu:hasCanvasMethod stats:CanvasMethod1 ; - visu:hasCanvasName "MyCanvas"^^xsd:string ; - visu:hasLayout "1 1"^^ds:intPair . - stats:DataInNormalization1_1 a stats:DataInNormalization ; - ds:hasReference stats:feature_1 . + ds:hasReference ds:feature_1 . stats:NormalizationMethod1 a stats:NormalizationMethod . stats:NormalizationTask1 a stats:NormalizationTask ; ds:hasInput stats:DataInNormalization1_1 ; - ds:hasNextTask stats:CanvasTask1 ; + ds:hasNextTask visu:CanvasTask1 ; ds:hasOutput stats:DataOutNormalization1 ; stats:hasNormalizationMethod stats:NormalizationMethod1 . -stats:PlotTask1 a visu:PlotTask ; +visu:CanvasMethod1 a visu:CanvasMethod . + +visu:CanvasTask1 a visu:CanvasTask ; + ds:hasNextTask visu:PlotTask1 ; + visu:hasCanvasMethod visu:CanvasMethod1 ; + visu:hasCanvasName "MyCanvas"^^xsd:string ; + visu:hasLayout "1 1"^^ds:intPair . + +visu:DataInVector1_1 a visu:DataInVector ; + ds:hasReference ds:feature_1 . + +visu:DataInVector2_1 a visu:DataInVector ; + ds:hasReference stats:DataOutNormalization1 . + +visu:PlotTask1 a visu:PlotTask ; ds:hasInput visu:DataInVector1_1 ; - ds:hasNextTask stats:PlotTask2 ; + ds:hasNextTask visu:PlotTask2 ; visu:hasLegendName "Feature 1 before normalization"^^xsd:string ; visu:hasLineStyle "o"^^xsd:string ; visu:hasLineWidth "1"^^xsd:int ; - visu:hasPlotMethod stats:ScatterplotMethod1 ; + visu:hasPlotMethod visu:ScatterplotMethod1 ; visu:hasScatterSize "1"^^xsd:int ; visu:hasScatterStyle "o"^^xsd:string . -stats:PlotTask2 a visu:PlotTask ; +visu:PlotTask2 a visu:PlotTask ; ds:hasInput visu:DataInVector2_1 ; visu:hasLegendName "Normalized feature 1"^^xsd:string ; visu:hasLineStyle "o"^^xsd:string ; visu:hasLineWidth "1"^^xsd:int ; - visu:hasPlotMethod stats:ScatterplotMethod2 ; + visu:hasPlotMethod visu:ScatterplotMethod2 ; visu:hasScatterSize "1"^^xsd:int ; visu:hasScatterStyle "o"^^xsd:string . -stats:ScatterplotMethod1 a visu:ScatterplotMethod . - -stats:ScatterplotMethod2 a visu:ScatterplotMethod . +visu:ScatterplotMethod1 a visu:ScatterplotMethod . -visu:DataInVector1_1 a visu:DataInVector ; - ds:hasReference stats:feature_1 . +visu:ScatterplotMethod2 a visu:ScatterplotMethod . -visu:DataInVector2_1 a visu:DataInVector ; - ds:hasReference stats:DataOutNormalization1 . - -stats:DataOutNormalization1 a ds:DataEntity . - -stats:feature_1 a ds:DataEntity ; +ds:feature_1 a ds:DataEntity ; ds:hasDataSemantics ds:TimeSeries ; ds:hasDataStructure ds:Vector ; ds:hasSource "feature_1"^^xsd:string . + +stats:DataOutNormalization1 a ds:DataEntity . diff --git a/examples/pipelines/VisuPipeline.ttl b/examples/pipelines/VisuPipeline.ttl index 916f4b7..b897956 100644 --- a/examples/pipelines/VisuPipeline.ttl +++ b/examples/pipelines/VisuPipeline.ttl @@ -2,10 +2,15 @@ @prefix visu: . @prefix xsd: . -visu:VisuPipeline a ds:Pipeline ; +ds:VisuPipeline a ds:Pipeline ; ds:hasInputDataPath "./examples/data/dummy_data.csv"^^xsd:string ; ds:hasStartTask visu:CanvasTask1 . +ds:feature_1 a ds:DataEntity ; + ds:hasDataSemantics ds:TimeSeries ; + ds:hasDataStructure ds:Vector ; + ds:hasSource "feature_1"^^xsd:string . + visu:CanvasMethod1 a visu:CanvasMethod . visu:CanvasTask1 a visu:CanvasTask ; @@ -15,7 +20,7 @@ visu:CanvasTask1 a visu:CanvasTask ; visu:hasLayout "1 2"^^ds:intPair . visu:DataInVector1_1 a visu:DataInVector ; - ds:hasReference visu:feature_1 . + ds:hasReference ds:feature_1 . visu:LineplotMethod1 a visu:LineplotMethod . @@ -25,8 +30,3 @@ visu:PlotTask1 a visu:PlotTask ; visu:hasLineStyle "-"^^xsd:string ; visu:hasLineWidth "1"^^xsd:int ; visu:hasPlotMethod visu:LineplotMethod1 . - -visu:feature_1 a ds:DataEntity ; - ds:hasDataSemantics ds:TimeSeries ; - ds:hasDataStructure ds:Vector ; - ds:hasSource "feature_1"^^xsd:string . diff --git a/examples/stats_pipeline_creation.py b/examples/stats_pipeline_creation.py index 0d1edc2..8fa4a9f 100644 --- a/examples/stats_pipeline_creation.py +++ b/examples/stats_pipeline_creation.py @@ -4,7 +4,7 @@ from exe_kg_lib import ExeKG if __name__ == "__main__": - exe_kg = ExeKG(kg_schema_name="Statistics") + exe_kg = ExeKG() my_data_entity = exe_kg.create_data_entity( name="feature_1", source_value="feature_1", @@ -19,51 +19,52 @@ ) normalization_task = exe_kg.add_task( - task_type="NormalizationTask", + kg_schema_short="stats", + task="NormalizationTask", input_data_entity_dict={"DataInNormalization": [my_data_entity]}, - method_type="NormalizationMethod", - data_properties={}, + method="NormalizationMethod", + properties_dict={}, ) norm_output = normalization_task.output_dict["DataOutNormalization"] canvas_task = exe_kg.add_task( - task_type="CanvasTask", + kg_schema_short="visu", + task="CanvasTask", input_data_entity_dict={}, - method_type="CanvasMethod", - data_properties={"hasCanvasName": "MyCanvas", "hasLayout": "1 1"}, - visualization=True, + method="CanvasMethod", + properties_dict={"hasCanvasName": "MyCanvas", "hasLayout": "1 1"}, ) feature_1_scatterplot_task = exe_kg.add_task( - task_type="PlotTask", + kg_schema_short="visu", + task="PlotTask", input_data_entity_dict={ "DataInVector": [my_data_entity], }, - method_type="ScatterplotMethod", - data_properties={ + method="ScatterplotMethod", + properties_dict={ "hasLegendName": "Feature 1 before normalization", "hasLineStyle": "o", "hasScatterStyle": "o", "hasLineWidth": 1, "hasScatterSize": 1, }, - visualization=True, ) norm_output_scatterplot_task = exe_kg.add_task( - task_type="PlotTask", + kg_schema_short="visu", + task="PlotTask", input_data_entity_dict={ "DataInVector": [norm_output], }, - method_type="ScatterplotMethod", - data_properties={ + method="ScatterplotMethod", + properties_dict={ "hasLegendName": "Normalized feature 1", "hasLineStyle": "o", "hasScatterStyle": "o", "hasLineWidth": 1, "hasScatterSize": 1, }, - visualization=True, ) exe_kg.save_created_kg(f"./pipelines/{pipeline_name}.ttl") diff --git a/examples/visu_pipeline_creation.py b/examples/visu_pipeline_creation.py index f25f92d..087135a 100644 --- a/examples/visu_pipeline_creation.py +++ b/examples/visu_pipeline_creation.py @@ -4,7 +4,7 @@ from exe_kg_lib import ExeKG if __name__ == "__main__": - exe_kg = ExeKG(kg_schema_name="Visualization") + exe_kg = ExeKG() my_data_entity = exe_kg.create_data_entity( name="feature_1", source_value="feature_1", @@ -19,10 +19,11 @@ canvas_task_properties = {"hasCanvasName": "MyCanvas", "hasLayout": "1 2"} canvas_task = exe_kg.add_task( - task_type="CanvasTask", + kg_schema_short="visu", + task="CanvasTask", input_data_entity_dict={}, - method_type="CanvasMethod", - data_properties=canvas_task_properties, + method="CanvasMethod", + properties_dict=canvas_task_properties, ) lineplot_task_properties = { @@ -31,10 +32,11 @@ "hasLineWidth": 1, } lineplot_task = exe_kg.add_task( - task_type="PlotTask", + kg_schema_short="visu", + task="PlotTask", input_data_entity_dict={"DataInVector": [my_data_entity]}, - method_type="LineplotMethod", - data_properties=lineplot_task_properties, + method="LineplotMethod", + properties_dict=lineplot_task_properties, ) exe_kg.save_created_kg(f"./pipelines/{pipeline_name}.ttl") diff --git a/exe_kg_lib/classes/entity.py b/exe_kg_lib/classes/entity.py index d522be1..5e062e1 100644 --- a/exe_kg_lib/classes/entity.py +++ b/exe_kg_lib/classes/entity.py @@ -21,7 +21,7 @@ def __init__(self, iri: str, parent_entity: Entity = None): @staticmethod def get_namespace(iri: str) -> str: - return iri.split("#")[0] + return iri.split("#")[0] + "#" @staticmethod def get_descriptor(iri: str) -> str: diff --git a/exe_kg_lib/classes/exe_kg.py b/exe_kg_lib/classes/exe_kg.py index 84b1b61..4f67d3a 100644 --- a/exe_kg_lib/classes/exe_kg.py +++ b/exe_kg_lib/classes/exe_kg.py @@ -50,16 +50,15 @@ class ExeKG: - def __init__(self, kg_schema_name: str = None, input_exe_kg_path: str = None): + def __init__(self, input_exe_kg_path: str = None): """ Args: - kg_schema_name: name of chosen bottom-level KG schema to use in case of KG construction (must be equal to one of KG_SCHEMAS keys) - acts as switch for KG construction mode (if filled, mode is on) input_exe_kg_path: path of KG to be executed acts as switch for KG execution mode (if filled, mode is on) """ self.top_level_schema = KGSchema.from_schema_info(KG_SCHEMAS["Data Science"]) # top-level KG schema + self.bottom_level_schemata = {} # top-level KG schema entities self.atomic_task = Entity(self.top_level_schema.namespace.AtomicTask) @@ -78,38 +77,33 @@ def __init__(self, kg_schema_name: str = None, input_exe_kg_path: str = None): bottom_level_schema_info_set = False # flag indicating that a bottom-level schema was found for schema_name, schema_info in KG_SCHEMAS.items(): # search for used bottom-level schema if ( - schema_name == "Data Science" or schema_name == "Visualization" + schema_name == "Data Science" # or schema_name == "Visualization" ): # skip top-level KG schema and Visualization schema that is always used continue if (schema_info["namespace_prefix"], URIRef(schema_info["namespace"])) in all_ns: # bottom-level schema found - self.bottom_level_schema = KGSchema.from_schema_info(schema_info) + self.bottom_level_schemata[schema_info["namespace_prefix"]] = KGSchema.from_schema_info(schema_info) bottom_level_schema_info_set = True - break - - visu_schema_info = KG_SCHEMAS["Visualization"] - if ( - not bottom_level_schema_info_set - and (visu_schema_info["namespace_prefix"], URIRef(visu_schema_info["namespace"])) in all_ns - ): # Visualization schema is considered the bottom-level schema ONLY IF no other bottom-level schema was found - self.bottom_level_schema = KGSchema.from_schema_info(visu_schema_info) - bottom_level_schema_info_set = True if not bottom_level_schema_info_set: # no bottom-level schema found, input executable KG is invalid print("Input executable KG did not have any bottom level KG schemas") exit(1) else: # KG construction mode - # bottom-level schema used as compatibility guide for constructing executable KG - self.bottom_level_schema = KGSchema.from_schema_info(KG_SCHEMAS[kg_schema_name]) + for schema_name, schema_info in KG_SCHEMAS.items(): # search for used bottom-level schema + if ( + schema_name == "Data Science" # or schema_name == "Visualization" + ): # skip top-level KG schema and Visualization schema that is always used + continue - self.visu_schema = KGSchema.from_schema_info( - KG_SCHEMAS["Visualization"] - ) # Visualization KG schema, always used + self.bottom_level_schemata[schema_info["namespace_prefix"]] = KGSchema.from_schema_info(schema_info) - self.input_kg += ( - self.top_level_schema.kg + self.bottom_level_schema.kg + self.visu_schema.kg - ) # combine all KG schemas in input KG + bottom_level_schemata_kgs = [kg_schema.kg for kg_schema in self.bottom_level_schemata.values()] + + self.input_kg += self.top_level_schema.kg # + self.visu_schema.kg # combine all KG schemas in input KG + + for bottom_level_schema_kg in bottom_level_schemata_kgs: + self.input_kg += bottom_level_schema_kg self.output_kg = Graph(bind_namespaces="rdflib") # KG to be filled while constructing executable KG @@ -143,14 +137,11 @@ def _bind_used_namespaces(self, kgs: List[Graph]): """ for kg in kgs: kg.bind(self.top_level_schema.namespace_prefix, self.top_level_schema.namespace) - kg.bind( - self.bottom_level_schema.namespace_prefix, - self.bottom_level_schema.namespace, - ) - kg.bind( - self.visu_schema.namespace_prefix, - self.visu_schema.namespace, - ) + for bottom_level_kg_schema in self.bottom_level_schemata.values(): + kg.bind( + bottom_level_kg_schema.namespace_prefix, + bottom_level_kg_schema.namespace, + ) def _parse_kgs(self) -> None: """ @@ -199,7 +190,6 @@ def create_pipeline_task(self, pipeline_name: str, input_data_path: str) -> Task """ pipeline = create_pipeline_task( self.top_level_schema.namespace, - self.bottom_level_schema.namespace, self.pipeline, self.output_kg, pipeline_name, @@ -227,7 +217,7 @@ def create_data_entity( DataEntity: object initialized with the given parameter values """ return DataEntity( - self.bottom_level_schema.namespace + name, + self.top_level_schema.namespace + name, self.data_entity, source_value, self.top_level_schema.namespace + data_semantics_name, @@ -236,31 +226,28 @@ def create_data_entity( def add_task( self, - task_type: str, + kg_schema_short: str, + task: str, input_data_entity_dict: Dict[str, List[DataEntity]], - method_type: str, - data_properties: Dict[str, Union[str, int, float]], - visualization: bool = False, + method: str, + properties_dict: Dict[str, Union[str, int, float]], ) -> Task: """ Instantiates and adds a new task entity to self.output_kg - Components added to the task during creation: input and output entities, and a method with data properties + Components attached to the task during creation: input and output data entities, and a method with properties Args: - task_type: type of the task - input_data_entity_dict: keys -> input entity names corresponding to the given task_type as defined in the chosen bottom-level KG schema - values -> list of corresponding data entities to be added as input to the task - method_type: type of the task's method - data_properties: keys -> data property names corresponding to the given method_type as defined in the chosen bottom-level KG schema - values -> list of corresponding values to be added as property values to the task - visualization: if True, the namespace prefix of Visualization KG schema is used during creation of the task - else, the namespace prefix of the chosen bottom-level KG schema is used + kg_schema_short: abbreviated name of the KG schema in which the task and method belong + task: task name + input_data_entity_dict: keys -> input names of the specified task + values -> lists of DataEntity objects to be added as input to the task + method: method name + properties_dict: keys -> property names of the specified method + values -> values to be added as parameters to the method Returns: Task: object of the created task """ - namespace_to_use = ( - self.visu_schema.namespace if visualization else self.bottom_level_schema.namespace - ) # use appropriate namespace for the task + kg_schema_to_use = self.bottom_level_schemata[kg_schema_short] relation_iri = ( self.top_level_schema.namespace.hasNextTask @@ -269,9 +256,9 @@ def add_task( ) # use relation depending on the previous task # instantiate task and link it with the previous one - parent_task = Task(namespace_to_use + task_type, self.atomic_task) + parent_task = Task(kg_schema_to_use.namespace + task, self.atomic_task) added_entity = add_instance_from_parent_with_relation( - self.bottom_level_schema.namespace, + kg_schema_to_use.namespace, self.output_kg, parent_task, relation_iri, @@ -281,11 +268,11 @@ def add_task( next_task = Task.from_entity(added_entity) # create Task object from Entity object # instantiate and add given input data entities to the task - self._add_inputs_to_task(next_task, input_data_entity_dict) + self._add_inputs_to_task(kg_schema_to_use.namespace, next_task, input_data_entity_dict) # instantiate and add output data entities to the task, as specified in the KG schema self._add_outputs_to_task(next_task) - method_parent = Entity(namespace_to_use + method_type, self.atomic_method) + method_parent = Entity(kg_schema_to_use.namespace + method, self.atomic_method) # fetch compatible methods and their properties from KG schema results = list( @@ -297,15 +284,15 @@ def add_task( ) chosen_property_method = next( - filter(lambda pair: pair[1].split("#")[1] == method_type, results), None + filter(lambda pair: pair[1].split("#")[1] == method, results), None ) # match given method_type with query result if chosen_property_method is None: - print(f"Property connecting task of type {task_type} with method of type {method_type} not found") + print(f"Property connecting task of type {task} with method of type {method} not found") exit(1) # instantiate method and link it with the task using the appropriate chosen_property_method[0] relation add_instance_from_parent_with_relation( - self.bottom_level_schema.namespace, + kg_schema_to_use.namespace, self.output_kg, method_parent, chosen_property_method[0], @@ -322,7 +309,7 @@ def add_task( property_name = property_iri.split("#")[1] range_iri = pair[1] input_property = Literal( - lexical_or_value=data_properties[property_name], + lexical_or_value=properties_dict[property_name], datatype=range_iri, ) add_literal(self.output_kg, next_task, property_iri, input_property) @@ -332,7 +319,10 @@ def add_task( return next_task def _add_inputs_to_task( - self, task_entity: Task, input_data_entity_dict: Dict[str, List[DataEntity]] = None + self, + namespace: Namespace, + task_entity: Task, + input_data_entity_dict: Dict[str, List[DataEntity]] = None, ) -> None: """ Instantiates and adds given input data entities to the given task of self.output_kg @@ -365,7 +355,7 @@ def _add_inputs_to_task( input_data_entity_list += get_input_for_new_data_entities( self.data_semantics_list, self.data_structure_list, - self.bottom_level_schema.namespace, + namespace, self.data_entity, ) @@ -462,7 +452,7 @@ def _create_next_task_cli(self) -> Union[None, Task]: # instantiate task and link it with the previous one task_entity = add_instance_from_parent_with_relation( - self.bottom_level_schema.namespace, + next_task_parent.namespace, self.output_kg, next_task_parent, relation_iri, @@ -473,7 +463,7 @@ def _create_next_task_cli(self) -> Union[None, Task]: task_entity = Task(task_entity.iri, task_entity.parent_entity) # create Task object from Entity object's info # instantiate and add input data entities to the task based on user input - self._add_inputs_to_task(task_entity) + self._add_inputs_to_task(next_task_parent.namespace, task_entity) # instantiate and add output data entities to the task, as specified in the KG schema self._add_outputs_to_task(task_entity) @@ -514,7 +504,7 @@ def _create_method(self, task_to_attach_to: Entity) -> None: ) # instantiate method and link it with the task using the appropriate selected_property_and_method[0] relation add_instance_from_parent_with_relation( - self.bottom_level_schema.namespace, + task_to_attach_to.namespace, self.output_kg, method_parent, selected_property_and_method[0], @@ -547,7 +537,6 @@ def start_pipeline_creation(self, pipeline_name: str, input_data_path: str) -> N """ pipeline = create_pipeline_task( self.top_level_schema.namespace, - self.bottom_level_schema.namespace, self.pipeline, self.output_kg, pipeline_name, @@ -730,23 +719,3 @@ def execute_pipeline(self): canvas_method = next_task next_task_iri = next_task.has_next_task - - @staticmethod - def input_kg_schema_name() -> str: - """ - Prompts the user to choose a schema by presenting the available schemas' names - Returns: - str: chosen schema name - """ - kg_schema_names = list(KG_SCHEMAS.keys()) - print( - "Choose a KG schema to use. Components of the Visualization schema can be used regardless of the chosen schema." - ) - for i, kg_schema_name in enumerate(kg_schema_names): - if kg_schema_name == "Data Science": - continue - print(f"{i}: {kg_schema_name}") - selected_schema_i = int(input()) - selected_schema_name = kg_schema_names[selected_schema_i] - - return selected_schema_name diff --git a/exe_kg_lib/cli/main.py b/exe_kg_lib/cli/main.py index b118345..a226fe8 100644 --- a/exe_kg_lib/cli/main.py +++ b/exe_kg_lib/cli/main.py @@ -17,9 +17,8 @@ @app.command() def create_pipeline(): pipeline_name, input_data_path = input_pipeline_info() - kg_schema_name = ExeKG.input_kg_schema_name() - exe_kg = ExeKG(kg_schema_name=kg_schema_name) + exe_kg = ExeKG() exe_kg.start_pipeline_creation(pipeline_name, input_data_path) exe_kg.save_created_kg(f"pipelines/{pipeline_name}.ttl") diff --git a/exe_kg_lib/utils/kg_creation_utils.py b/exe_kg_lib/utils/kg_creation_utils.py index 5f03fdc..ea9a00a 100644 --- a/exe_kg_lib/utils/kg_creation_utils.py +++ b/exe_kg_lib/utils/kg_creation_utils.py @@ -194,7 +194,6 @@ def add_and_attach_data_entity( def create_pipeline_task( top_level_schema_namespace: Namespace, - bottom_level_schema_namespace: Namespace, parent_entity: Entity, kg: Graph, pipeline_name: str, @@ -204,7 +203,6 @@ def create_pipeline_task( Adds instance of pipeline task to kg Args: top_level_schema_namespace: namespace of the top-level KG schema - bottom_level_schema_namespace: namespace of the bottom-level KG schema parent_entity: parent entity of pipeline instance kg: Graph object to add to pipeline_name: name for the pipeline @@ -213,7 +211,7 @@ def create_pipeline_task( Returns: Task: created pipeline task """ - pipeline = Task(bottom_level_schema_namespace + pipeline_name, parent_entity) + pipeline = Task(top_level_schema_namespace + pipeline_name, parent_entity) add_instance(kg, pipeline) input_data_path_literal = Literal(lexical_or_value=input_data_path, datatype=XSD.string) diff --git a/mkdocs.yml b/mkdocs.yml index 6012066..d910c9b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: AGPL-3.0 site_name: "ExeKGLib" -site_author: "Mohamed Gad-Elrab" +site_author: "Antonis Klironomos & Mohamed Gad-Elrab" site_description: "Library for executable ML pipelines represented by KGs." site_url: "https://boschresearch.github.io/ExeKGLib"