Skip to content

Commit

Permalink
Prerocessing Repo Additions (#68)
Browse files Browse the repository at this point in the history
* updated

* lock

* updated mineral sites

* here

* latest

* updated

* multiple vector rasters

* latest

* latest

* format

* list of processed ids

* version
  • Loading branch information
marshHawk4 authored Oct 15, 2024
1 parent 5e3eb30 commit 38db806
Show file tree
Hide file tree
Showing 4 changed files with 1,083 additions and 13 deletions.
49 changes: 47 additions & 2 deletions cdr_schemas/cdr_responses/prospectivity.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from datetime import datetime
from typing import List, Union

from geojson_pydantic import LineString, Point, Polygon
from pydantic import BaseModel, ConfigDict, Field

from cdr_schemas.prospectivity_input import (
CreateCriticalMineralAssessment,
CreateDataSource,
DataTypeId,
TranformMethods,
)
from cdr_schemas.prospectivity_models import NeuralNetUserOptions, SOMTrainConfig
Expand All @@ -26,8 +28,35 @@ class CreateProcessDataLayer(BaseModel):
data_source: DataSource = Field(description="Data source to create this layer")
title: str = Field(description="Title to use for processed layer")
transform_methods: TranformMethods = Field(
default="", description="Transformation method used"
default_factory=list, description="Transformation method used"
)
label_raster: bool = Field(description="A label layer for training")


class DataTypeIdWithGeom(DataTypeId):
geom: Point | LineString | Polygon = Field(description="Adding feature coords")


class CreateVectorProcessDataLayer(BaseModel):
label_raster: bool = Field(
default=False, description="Layer used to train prospectivity models"
)
title: str = Field(description="Title to use for processed layer")
evidence_features: List[DataTypeIdWithGeom] = Field(
default_factory=list, description="Feature ids from the cdr"
)
extra_geometries: List[Point | LineString | Polygon] = Field(
default_factory=list,
description="site locations selected by expert. Use EPSG:4326 only",
)
transform_methods: TranformMethods = Field(
default_factory=list, description="Transformation method used"
)


class ProcessedDataLayer(BaseModel):
layer_id: str = Field(description="Layer id")
download_url: str = Field(description="Download url")


class ProspectModelMetaData(BaseModel):
Expand All @@ -40,6 +69,22 @@ class ProspectModelMetaData(BaseModel):
cma: CriticalMineralAssessment = Field(description="CMA info")
model_type: str
train_config: Union[SOMTrainConfig, NeuralNetUserOptions]
evidence_layers: List[CreateProcessDataLayer]
evidence_layers: List[ProcessedDataLayer] = Field(
description="Processed data layer ids."
)

model_config = ConfigDict(protected_namespaces=())


class ProcessDataLayers(BaseModel):
cma: CriticalMineralAssessment = Field(description="CMA info")

evidence_layers: List[CreateProcessDataLayer] = Field(
default_factory=list, description="Datasource and preprocess steps"
)
vector_layers: List[CreateVectorProcessDataLayer] = Field(
default_factory=list,
description="Vector features and preprocess steps. EPSG:4326",
)

model_config = ConfigDict(protected_namespaces=())
80 changes: 70 additions & 10 deletions cdr_schemas/prospectivity_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from enum import Enum
from typing import List, Optional, Union

from geojson_pydantic import MultiPolygon
from geojson_pydantic import LineString, MultiPolygon, Point, Polygon
from pydantic import BaseModel, ConfigDict, Field

from cdr_schemas.prospectivity_models import (
Expand Down Expand Up @@ -102,32 +102,73 @@ class CreateCriticalMineralAssessment(BaseModel):
# MTRI UI TO CDR:
# define preprocessing actions
class DefineProcessDataLayer(BaseModel):
cma_id: str = Field(description="ID of the cma")
data_source_id: str = Field(description="Data source id used to create this layer")
data_source_id: str = Field(
description="Processed data source id used to create this layer"
)
title: str = Field(description="Title to use for processed layer")
transform_methods: TranformMethods = Field(
default_factory=list, description="Transformation method used"
)
label_raster: bool = Field(
default=False, description="Layer used to train prospectivity models"
)


# TA3 TO CDR:
# Send along with a processed data layer used for training to support their model output.
# TA3 can send each layer of the training stack used to generate the output one layer at a time


class RawDataType(str, Enum):
MINERAL_SITE = "mineral_site"
POINT = "point"
LINE = "line"
POLYGON = "polygon"
TIF = "tif"
VECTOR = "vector"


class DataTypeId(BaseModel):
raw_data_type: RawDataType = Field(description="Type of feature.")
id: str = Field(description="Id of feature in cdr")


class SaveProcessedDataLayer(BaseModel):
model_run_id: str = Field(
description="Connect this processed data layer to a model run output layer"
)
data_source_id: str = Field(description="Data source id used to create this layer")
cma_id: str = Field(description="ID of the cma")
title: str = Field(description="Title for processed layer")
title: str = Field(description="Title of processed layer")
label_raster: bool = Field(
default=False, description="Layer used to train prospectivity models"
)
raw_data_info: List[DataTypeId] = Field(
default_factory=list, description="cdr ids and types of all features used"
)
extra_geometries: List = Field(
default_factory=list, description="Extra geometries used to create this layer"
)
system: str
system_version: str
transform_methods: TranformMethods = Field(
default="", description="Transformation method used"
default_factory=list, description="Transformation methods used"
)
model_config = ConfigDict(protected_namespaces=())


class DefineVectorProcessDataLayer(BaseModel):
label_raster: bool = Field(
default=False, description="Layer used to train prospectivity models"
)
title: str = Field(description="Title to use for processed layer")
evidence_features: List[DataTypeId] = Field(
default_factory=list, description="cdr ids and types of all features used"
)
extra_geometries: List[Point | LineString | Polygon] = Field(
default_factory=list, description="Extra geometries to be used"
)
transform_methods: TranformMethods = Field(
default_factory=list, description="Transformation methods used"
)


# MTRI UI to CDR:
# defines the cma, model training config and layer preprocessing steps
class CreateProspectModelMetaData(BaseModel):
Expand All @@ -139,9 +180,28 @@ class CreateProspectModelMetaData(BaseModel):
organization: str = ""
model_type: str
train_config: Union[SOMTrainConfig, NeuralNetUserOptions]
evidence_layers: List[str] = Field(
description="List of ids of processed data layers"
)

model_config = ConfigDict(protected_namespaces=())


# MTRI UI to CDR:
# defines the layer preprocessing steps
class CreateProcressDataLayers(BaseModel):
cma_id: str = Field(description="CMA id")
system: str
system_version: str

evidence_layers: List[DefineProcessDataLayer] = Field(
description="Datasource and preprocess steps"
default_factory=list, description="Datasource and preprocess steps"
)
vector_layers: List[DefineVectorProcessDataLayer] = Field(
default_factory=list,
description="A list of raster to be created using a set of vector features",
)

model_config = ConfigDict(protected_namespaces=())


Expand Down
Loading

0 comments on commit 38db806

Please sign in to comment.