-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from NeotomaDB/develop_sedv
Develop sedv
- Loading branch information
Showing
55 changed files
with
2,114 additions
and
868 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import pandas as pd | ||
import yaml | ||
|
||
def csv_to_yaml(xl_path, yml_output= 'output_yml.yml'): | ||
""" | ||
_csv_to_yaml_ | ||
Args: | ||
xl_path (_list_): _Excel file to be used as template_ | ||
yml_output (_list_): _Location and file name where the yaml template will be stored_ | ||
Returns: | ||
_None_: _The output file will be stored, no need to return anything here_ | ||
""" | ||
df = pd.read_excel(xl_path) | ||
|
||
# Convert DataFrame to a dictionary with list of columns | ||
data_dict = df.to_dict(orient='records') | ||
nested_data = [{key: value for key, value in zip(df.columns, row)} for row in data_dict] | ||
|
||
with open(yml_output, 'w') as yaml_file: | ||
yaml.dump(nested_data, yaml_file, default_flow_style=False) | ||
|
||
print(f'YAML file stored in {yml_output} successfully.') | ||
|
||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import yaml | ||
from yaml.loader import SafeLoader | ||
from collections import defaultdict | ||
import itertools | ||
import pandas as pd | ||
import os | ||
import sys | ||
import argparse | ||
|
||
""" | ||
To run from command line use: | ||
python csv_validator.py /path/to/directory | ||
Example:/ | ||
python 210Pb_Template/neotomaUploader/csvValidator.py --path=210Pb_Template/data/ --template=210Pb_Template/template.yml | ||
""" | ||
|
||
def yml_to_dict(yml_file): | ||
"""_Read in valid yaml file._ | ||
Args: | ||
yml_file (_string_): _A valid filename for a yaml file._ | ||
Returns: | ||
_dict_: _A dict representation of a yaml file._ | ||
""" | ||
if not os.path.isfile(yml_file): | ||
raise FileNotFoundError(f"The file '{yml_file}' could not be found within the current path.") | ||
|
||
with open(yml_file, encoding = "UTF-8") as file: | ||
yml_data = yaml.load(file, Loader = SafeLoader) | ||
return yml_data | ||
|
||
|
||
def csv_validator(filename, yml_data): | ||
"""_Validate csv file for use in the validator._ | ||
Args: | ||
filename (_string_): _A valid csv filename._ | ||
yml_data (_dict_): _A dict passed from yml_to_dict()_ | ||
Returns: | ||
_type_: _description_ | ||
""" | ||
log_file = [] | ||
# Take directly from .yml file | ||
col_values = [d.get('column') for d in yml_data] | ||
|
||
if not os.path.isfile(filename): | ||
raise FileNotFoundError(f"The file '{filename}' could not be found within the current path.") | ||
|
||
try: | ||
# Load csv file as data frame and extract columns | ||
df = pd.read_csv(filename) | ||
except pd.errors.ParserError: | ||
log_file.append(f"✗ Error opening file '{filename}': {e}"+ '\n') | ||
|
||
df_columns = list(df.columns) | ||
# Verify that all columns from the DF are in the YAML file | ||
diff_col = sorted(set(col_values) - set(df_columns)) | ||
|
||
# Verify that all columns from the YAML are in the DF | ||
diff_val = sorted(set(df_columns)-set(col_values)) | ||
|
||
# Report in the log | ||
if diff_col == diff_val: | ||
message = ["✔ The column names and flattened YAML keys match"] | ||
log_file = log_file + message | ||
else: | ||
log_file = log_file + ["✗ The column names and flattened YAML keys do not match"] | ||
log_file = log_file + [f"Columns from the YAML template are not in the data frame: '{diff_val}'"] | ||
log_file = log_file + [f"Columns from the data frame not in the YAML template: '{diff_col}'"] | ||
|
||
return log_file |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.