-
Notifications
You must be signed in to change notification settings - Fork 43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Automate validation of pipeline configs #132
Changes from all commits
1cfd4bb
c3bd917
18b4741
3090e65
ea70ff2
fb0b247
4e03858
63630be
39016a8
1be5c89
b430fa7
f136462
93f9ca0
1f39d94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,4 @@ pytest-asyncio | |
pytest-cov | ||
pytest-html | ||
tox>=4.4.2,<5 | ||
jsonschema |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/usr/bin/env python | ||
|
||
# Standard | ||
import glob | ||
import json | ||
import sys | ||
|
||
# Third Party | ||
from jsonschema import validate | ||
import jsonschema | ||
import yaml | ||
|
||
|
||
def validate_yaml_file(yaml_file, schema): | ||
with open(yaml_file, "r") as file: | ||
pipeline = yaml.safe_load(file) | ||
|
||
try: | ||
validate(instance=pipeline, schema=schema) | ||
print(f"Validation successful for {yaml_file}.") | ||
except jsonschema.exceptions.ValidationError as err: | ||
print(f"Validation failed for {yaml_file}:", err) | ||
return False | ||
return True | ||
|
||
|
||
def main(): | ||
schema_path = "src/instructlab/sdg/pipelines/schema/v1.json" | ||
with open(schema_path, "r") as file: | ||
schema = json.load(file) | ||
|
||
yaml_files = glob.glob("src/instructlab/sdg/pipelines/**/*.yaml", recursive=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And loading these from a command-line argument? Then it can be a tool that can be used to validate custom pipeline configs using the installed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that sounds good. We could even leave this as the default when you don't pass an argument |
||
all_valid = True | ||
for yaml_file in yaml_files: | ||
print("=======================================================") | ||
print("=== Validating", yaml_file) | ||
print("=======================================================") | ||
if not validate_yaml_file(yaml_file, schema): | ||
all_valid = False | ||
|
||
return 1 if not all_valid else 0 | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,260 @@ | ||
{ | ||
"type": "object", | ||
"additionalProperties": false, | ||
"required": ["version", "blocks"], | ||
"properties": { | ||
"version": { | ||
"type": "string" | ||
}, | ||
"blocks": { | ||
"type": "array", | ||
"additionalProperties": false, | ||
"items": { | ||
"type": "object", | ||
"additionalProperties": false, | ||
"required": ["name", "type", "config"], | ||
"properties": { | ||
"name": { | ||
"type": "string" | ||
}, | ||
"type": { | ||
"type": "string" | ||
}, | ||
"drop_duplicates": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
}, | ||
"drop_columns": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
}, | ||
"gen_kwargs": { | ||
"type": "object", | ||
"properties": { | ||
markmc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"model": { | ||
"type": "string" | ||
}, | ||
"max_tokens": { | ||
"type": "number" | ||
}, | ||
"temperature": { | ||
"type": "number" | ||
}, | ||
"n": { | ||
"type": "number" | ||
}, | ||
"extra_body": { | ||
"type": "object" | ||
} | ||
} | ||
}, | ||
"config": { | ||
"anyOf": [ | ||
{ | ||
"type": "object", | ||
"description": "ImportBlock", | ||
"required": ["path"], | ||
"additionalProperties": false, | ||
"properties": { | ||
"path": { | ||
"type": "string" | ||
} | ||
} | ||
}, | ||
{ | ||
"type": "object", | ||
"description": "FilterByValueBlock", | ||
"required": ["filter_column", "filter_value", "operation"], | ||
"additionalProperties": false, | ||
"properties": { | ||
"convert_dtype": { | ||
"type": "string", | ||
"enum": ["float", "int", "bool"] | ||
}, | ||
"filter_column": { | ||
"type": "string" | ||
}, | ||
"filter_value": { | ||
"oneOf": [ | ||
{ | ||
"type": "string" | ||
}, | ||
{ | ||
"type": "number" | ||
}, | ||
{ | ||
"type": "array", | ||
"items": { | ||
"oneOf": [ | ||
{ | ||
"type": "string" | ||
}, | ||
{ | ||
"type": "number" | ||
} | ||
] | ||
} | ||
} | ||
] | ||
}, | ||
"operation": { | ||
"type": "string", | ||
"enum": ["eq", "ne", "gt", "ge", "lt", "le", "contains"] | ||
} | ||
} | ||
}, | ||
{ | ||
"type": "object", | ||
"description": "LLMBlock", | ||
markmc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"required": ["config_path", "output_cols"], | ||
"additionalProperties": false, | ||
"properties": { | ||
"config_path": { | ||
"type": "string" | ||
}, | ||
"output_cols": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
}, | ||
"parser_kwargs": { | ||
"type": "object", | ||
"properties": { | ||
"parser_name": { | ||
"type": "string" | ||
}, | ||
"parsing_pattern": { | ||
"type": "string" | ||
}, | ||
"parser_cleanup_tags": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
} | ||
} | ||
}, | ||
"batch_kwargs": { | ||
"type": "object", | ||
"properties": { | ||
"num_samples": { | ||
"type": "number" | ||
} | ||
} | ||
} | ||
} | ||
}, | ||
{ | ||
"type": "object", | ||
"description": "ConditionalLLMBlock", | ||
"required": ["config_paths", "output_cols", "selector_column_name"], | ||
"additionalProperties": false, | ||
"properties": { | ||
"config_paths": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
}, | ||
"output_cols": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
}, | ||
"selector_column_name": { | ||
"type": "string" | ||
}, | ||
"parser_kwargs": { | ||
"type": "object", | ||
"properties": { | ||
"parser_name": { | ||
"type": "string" | ||
}, | ||
"parsing_pattern": { | ||
"type": "string" | ||
}, | ||
"parser_cleanup_tags": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
} | ||
} | ||
}, | ||
"batch_kwargs": { | ||
"type": "object", | ||
"properties": { | ||
"num_samples": { | ||
"type": "number" | ||
} | ||
} | ||
} | ||
} | ||
}, | ||
{ | ||
"type:": "object", | ||
"description": "SamplePopulatorBlock", | ||
"additionalProperties": false, | ||
"required": ["config_paths", "column_name"], | ||
"properties": { | ||
"config_paths": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
}, | ||
"column_name": { | ||
"type": "string" | ||
}, | ||
"post_fix": { | ||
"type": "string" | ||
} | ||
} | ||
}, | ||
{ | ||
"type:": "object", | ||
"description": "SelectorBlock", | ||
"additionalProperties": false, | ||
"required": ["choice_map", "choice_col", "output_col"], | ||
"properties": { | ||
"choice_map": { | ||
"type": "object" | ||
}, | ||
"choice_col": { | ||
"type": "string" | ||
}, | ||
"output_col": { | ||
"type": "string" | ||
} | ||
} | ||
}, | ||
{ | ||
"type:": "object", | ||
"description": "CombineColumnsBlock", | ||
"additionalProperties": false, | ||
"required": ["columns", "output_col"], | ||
"properties": { | ||
"output_col": { | ||
"type": "string" | ||
}, | ||
"columns": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
} | ||
} | ||
} | ||
] | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about loading this from the
instructlab.sdg
packageThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, that would be better