Skip to content

Commit

Permalink
Kyle review ta3 Integration (#51)
Browse files Browse the repository at this point in the history
* updated to work with cdr

* Think this is it

* updated

* update

* update

* refactor

* schema

* updated

---------

Co-authored-by: スコット <[email protected]>
  • Loading branch information
marshHawk4 and scotthaleen authored Jul 23, 2024
1 parent 89d7903 commit 1e2a264
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 147 deletions.
43 changes: 43 additions & 0 deletions cdr_schemas/cdr_responses/prospectivity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import List, Union

from pydantic import BaseModel, Field

from cdr_schemas.prospectivity_input import (
CreateCriticalMineralAssessment,
CreateDataSource,
TranformMethods,
)
from cdr_schemas.prospectivity_models import NeuralNetUserOptions, SOMTrainConfig


class CriticalMineralAssessment(CreateCriticalMineralAssessment):
cma_id: str = Field(description="ID of the cma")
download_url: str = Field(description="url to view template raster")


class DataSource(CreateDataSource):
data_source_id: str = Field(default="")
download_url: str


class CreateProcessDataLayer(BaseModel):
cma: CriticalMineralAssessment = Field(
description="CMA with all information needed for processing"
)
data_source: DataSource = Field(description="Data source to create this layer")
title: str = Field(description="Title to use for processed layer")
transform_methods: TranformMethods = Field(
default="", description="Transformation method used"
)


class ProspectModelMetaData(BaseModel):
"""
# CDR to TA3: EVENT
provides a model run id, cma
"""

model_run_id: str = Field(description="CDR id of the model run")
cma: CriticalMineralAssessment = Field(description="CMA info")
train_config: Union[SOMTrainConfig, NeuralNetUserOptions]
evidence_layers: List[CreateProcessDataLayer]
143 changes: 77 additions & 66 deletions cdr_schemas/prospectivity_input.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
from enum import Enum
from typing import List, Optional, Tuple, Union
from typing import List, Union

from geojson_pydantic import MultiPolygon
from pydantic import BaseModel, Field


class InterpolationType(str, Enum):
"""Enum for the possible values of type field of MapUnit"""

LINEAR = "linear"
CUBIC = "cubic"
NEAREST = "nearest"
NONE = "none"
from cdr_schemas.prospectivity_models import (
NeuralNetUserOptions,
SOMTrainConfig,
)


class ScalingType(str, Enum):
Expand All @@ -38,20 +34,6 @@ class DataFormat(str, Enum):
SHP = "shp"


class DataSource(BaseModel):
DOI: Optional[str]
authors: Optional[List[str]]
publication_date: Optional[str]
category: Optional[Union[LayerCategory, str]]
subcategory: Optional[str]
description: Optional[str]
derivative_ops: Optional[str]
type: LayerDataType
resolution: Optional[tuple]
format: DataFormat
download_url: Optional[str]


class TransformMethod(str, Enum):
LOG = "log"
ABS = "abs"
Expand All @@ -65,62 +47,91 @@ class ImputeMethod(str, Enum):

class Impute(BaseModel):
impute_method: ImputeMethod
window_size: Tuple[int, int] = Field(
default=(3, 3),
window_size: List[int] = Field(
default=[3, 3],
description="Size of window centered around pixel to be imputed.",
)


class ProcessedDataLayer(BaseModel):
title: Optional[str]
transform_method: Union[TransformMethod, Impute]
scaling_method: ScalingType
normalization_method: str # source: LayerDataType


class CriticalMineralAssessment(BaseModel):
class CreateDataSource(BaseModel):
DOI: str = Field(default="")
authors: List[str] = Field(default_factory=list)
publication_date: str = Field(default="")
category: LayerCategory = Field(default="")
subcategory: str = Field(default="")
description: str = Field(default="")
derivative_ops: str = Field(default="")
type: LayerDataType
resolution: List[Union[int, float]] = Field(default_factory=list)
format: DataFormat
reference_url: str = ""
evidence_layer_raster_prefix: str = ""


# TA3 TO CDR:
# TA3 can send this with the raster as their model output.
class ProspectivityOutputLayer(BaseModel):
system: str
system_version: str
model: str = ""
model_version: str = ""
model_run_id: str = Field(description="Connect this output to a model run")
output_type: str # one of (likelihood, uncertainty)
cma_id: str = Field(description="id of the cma")
title: str = Field(description="Title for prospectivity layer")


# MTRI to CDR:
# send to cdr to create new cma. Will be associated with template raster uploaded
class CreateCriticalMineralAssessment(BaseModel):
crs: str
extent: MultiPolygon
resolution: Tuple[int, int]
resolution: List[Union[float, int]]
mineral: str
description: str


class CMATemplate(BaseModel):
cma: CriticalMineralAssessment
file: str # path to template raster defining extent,crs for resampling
TranformMethods = List[Union[TransformMethod, Impute, ScalingType]]


class StackMetaData(BaseModel):
title: Optional[str] = Field(
...,
description="""
Title of the map/cog.
""",
)
year: Optional[int] = Field(
...,
description="""
Year the map was made. i.e. 2012
""",
)
crs: Optional[str] = Field(
...,
description="""
CRS of the map. i.e. "EPSG:4267"
""",
# MTRI UI TO CDR:
# define preprocessing actions
class DefineProcessDataLayer(BaseModel):
cma_id: str = Field(description="ID of the cma")
data_source_id: str = Field(description="Data source id used to create this layer")
title: str = Field(description="Title to use for processed layer")
transform_methods: TranformMethods = Field(
default_factory=list, description="Transformation method used"
)
authors: Optional[List[str]] = Field(
...,
description="""
Creators of the dataset
""",


# TA3 TO CDR:
# Send along with a processed data layer used for training to support their model output.
# TA3 can send each layer of the training stack used to generate the output one layer at a time
class SaveProcessedDataLayer(BaseModel):
model_run_id: str = Field(
description="Connect this processed data layer to a model run output layer"
)
organization: Optional[str] = Field(
...,
description="""
Organization that created the map
""",
data_source_id: str = Field(description="Data source id used to create this layer")
cma_id: str = Field(description="ID of the cma")
title: str = Field(description="Title for processed layer")
system: str
system_version: str
transform_methods: TranformMethods = Field(
default="", description="Transformation method used"
)

evidence_layers: List[ProcessedDataLayer]

# MTRI UI to CDR:
# defines the cma, model training config and layer preprocessing steps
class CreateProspectModelMetaData(BaseModel):
cma_id: str = Field(description="CMA id")
system: str
system_version: str
author: str = ""
date: str = ""
organization: str = ""
train_config: Union[SOMTrainConfig, NeuralNetUserOptions]
evidence_layers: List[DefineProcessDataLayer] = Field(
description="Datasource and preprocess steps"
)
95 changes: 15 additions & 80 deletions cdr_schemas/prospectivity_models.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,21 @@
from enum import Enum
from typing import List, Optional, Tuple, Union
from typing import Optional, Tuple

from pydantic import BaseModel, Field

from .prospectivity_input import CMATemplate, StackMetaData


class Accelerator(str, Enum):
CPU = "cpu"
GPU = "gpu"


class NeuralNetTrainConfig(BaseModel):
min_epochs: int # prevents early stopping
max_epochs: int

accelerator: Accelerator

# mixed precision for extra speed-up
precision: int

# perform a validation loop twice every training epoch
val_check_interval: float

# set True to to ensure deterministic results
# makes training slower but gives more reproducibility than just setting seeds
deterministic: bool
class NeuralNetUserOptions(BaseModel):
smoothing: Optional[float] = Field(
default=0.5,
description="Controls certainty of data labels. Low smoothing results in large gradients between low vs high prospectivity areas. High smoothing results in incremental gradients between low vs high prospectivity areas.",
)
dropout: Optional[float] = Field(
default=0.5,
description="Dropout influences variance of network outputs. Low dropout results in deterministic prospectivity map. High dropout results in probabilistic prospectivity map.",
)
negative_sampling_fraction: Optional[Tuple[float, float]] = Field(
default=(0.0, 0.25)
)


class NeighborhoodFunction(str, Enum):
Expand Down Expand Up @@ -66,10 +56,10 @@ class SOMTrainConfig(BaseModel):
dimensions_y: Optional[int] = Field(
default=20, description="Dimension of generated SOM space in y"
)
num_initializations: int = Field(
num_initializations: Optional[int] = Field(
default=5, description="Number of initializations to run"
)
num_epochs: Optional[int] = Field(default=10, description="Number of epochs to run")
num_epochs: int = Field(default=10, description="Number of epochs to run")
grid_type: Optional[SOMGrid] = Field(default=SOMGrid.RECTANGULAR)
som_type: Optional[SOMType] = Field(default=SOMType.TOROID)
som_initialization: Optional[SOMInitialization] = Field(
Expand All @@ -89,58 +79,3 @@ class SOMTrainConfig(BaseModel):
)
initial_learning_rate: Optional[float]
final_learning_rate: Optional[float]


class NeuralNetModel(BaseModel):
train_config: NeuralNetTrainConfig
pass


class SOMModel(BaseModel):
train_config: SOMTrainConfig
pass


class CMAModel(BaseModel):
title: Optional[str] = Field(
...,
description="""
Title of the model.
""",
)
date: Optional[int] = Field(
...,
description="""
Date that the model was made. i.e. 2012
""",
)
authors: Optional[List[str]] = Field(
...,
description="""
Creators of the model
""",
)
organization: Optional[str] = Field(
...,
description="""
Organization that created the model
""",
)
cma_model_type: Union[NeuralNetModel, SOMModel]

training_data: StackMetaData
cma_template: CMATemplate


class NeuralNetUserOptions(BaseModel):
smoothing: Optional[float] = Field(
default=0.5,
description="Controls certainty of data labels. Low smoothing results in large gradients between low vs high prospectivity areas. High smoothing results in incremental gradients between low vs high prospectivity areas.",
)
dropout: Optional[float] = Field(
default=0.5,
description="Dropout influences variance of network outputs. Low dropout results in deterministic prospectivity map. High dropout results in probabilistic prospectivity map.",
)
negative_sampling_fraction: Optional[Tuple[float, float]] = Field(
default=(0.0, 0.25)
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cdr_schemas"
version = "0.3.8"
version = "0.3.9"
description = "CDR Schemas"
authors = []
readme = "README.md"
Expand Down

0 comments on commit 1e2a264

Please sign in to comment.