Skip to content

Commit

Permalink
[Form Builder] Added unittests for main components
Browse files Browse the repository at this point in the history
  • Loading branch information
meta-paul committed Feb 24, 2024
1 parent af2d366 commit fc60794
Show file tree
Hide file tree
Showing 25 changed files with 2,787 additions and 52 deletions.
2 changes: 2 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ Putting it altogether, let's prepare and launch a task featuring a form containi
mephisto review_app -h 0.0.0.0 -p 8000 -d True -f True
```

_Note: if a package build was terminated/failed, or related source code was changed, FormComposer needs to be rebuilt with this command: `mephisto scripts form_composer rebuild_all_apps`._

---

# Your Mephisto project
Expand Down
14 changes: 12 additions & 2 deletions mephisto/client/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import mephisto.scripts.mturk.launch_makeup_hits as launch_makeup_hits_mturk
import mephisto.scripts.mturk.print_outstanding_hit_status as print_outstanding_hit_status_mturk
import mephisto.scripts.mturk.print_outstanding_hit_status as soft_block_workers_by_mturk_id_mturk
import mephisto.scripts.form_composer.rebuild_all_apps as rebuild_all_apps_form_composer
from mephisto.client.cli_commands import get_wut_arguments
from mephisto.generators.form_composer.config_validation.task_data_config import (
create_extrapolated_config
Expand Down Expand Up @@ -223,7 +224,7 @@ def print_non_markdown_list(items: List[str]):
res += "\n * " + item
return res

VALID_SCRIPT_TYPES = ["local_db", "heroku", "metrics", "mturk"]
VALID_SCRIPT_TYPES = ["local_db", "heroku", "metrics", "mturk", "form_composer"]
if script_type is None or script_type.strip() not in VALID_SCRIPT_TYPES:
print("")
raise click.UsageError(
Expand All @@ -247,6 +248,9 @@ def print_non_markdown_list(items: List[str]):
"print_outstanding_hit_status",
"soft_block_workers_by_mturk_id",
]
FORM_COMPOSER_VALID_SCRIPTS_NAMES = [
"rebuild_all_apps",
]
script_type_to_scripts_data = {
"local_db": {
"valid_script_names": LOCAL_DB_VALID_SCRIPTS_NAMES,
Expand Down Expand Up @@ -275,10 +279,16 @@ def print_non_markdown_list(items: List[str]):
MTURK_VALID_SCRIPTS_NAMES[0]: cleanup_mturk.main,
MTURK_VALID_SCRIPTS_NAMES[1]: identify_broken_units_mturk.main,
MTURK_VALID_SCRIPTS_NAMES[2]: launch_makeup_hits_mturk.main,
MTURK_VALID_SCRIPTS_NAMES[3]: print_outstanding_hit_status_mturk.main,
MTURK_VALID_SCRIPTS_NAMES[3]: rebuild_all_apps_form_composer.main,
MTURK_VALID_SCRIPTS_NAMES[4]: soft_block_workers_by_mturk_id_mturk.main,
},
},
"form_composer": {
"valid_script_names": FORM_COMPOSER_VALID_SCRIPTS_NAMES,
"scripts": {
FORM_COMPOSER_VALID_SCRIPTS_NAMES[0]: rebuild_all_apps_form_composer.main,
},
},
}

if script_name is None or (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ def _duplicate_values_exist(unique_names: UniqueAttrsType, errors: List[str]) ->
return is_valid


def validate_form_config(config_json: dict) -> Tuple[bool, List[str]]:
def validate_form_config(config_data: dict) -> Tuple[bool, List[str]]:
is_valid = True
errors = []

if not isinstance(config_json, dict):
if not isinstance(config_data, dict):
is_valid = False
errors.append("Form config must be a key/value JSON Object.")

elif config_json.keys() != AVAILABLE_CONFIG_ATTRS.keys():
elif config_data.keys() != AVAILABLE_CONFIG_ATTRS.keys():
is_valid = False
errors.append(
f"Form config must contain only these attributes: "
Expand All @@ -77,10 +77,10 @@ def validate_form_config(config_json: dict) -> Tuple[bool, List[str]]:
unique_names: UniqueAttrsType = {}

# Add main config level
items_to_validate.append((config_json, "Config", AVAILABLE_CONFIG_ATTRS))
items_to_validate.append((config_data, "Config", AVAILABLE_CONFIG_ATTRS))

# Add form
form = config_json["form"]
form = config_data["form"]
items_to_validate.append((form, "form", AVAILABLE_FORM_ATTRS))
_collect_values_for_unique_attrs_from_item(form, unique_names)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from botocore.exceptions import BotoCoreError
from botocore.exceptions import ClientError
from botocore.exceptions import NoCredentialsError
from rich import print

from mephisto.generators.form_composer.constants import TOKEN_END_SYMBOLS
from mephisto.generators.form_composer.constants import TOKEN_START_SYMBOLS
Expand All @@ -22,17 +23,17 @@


def validate_separate_token_values_config(
config_json: Dict[str, List[str]],
config_data: Dict[str, List[str]],
) -> Tuple[bool, List[str]]:
is_valid = True
errors = []

if not isinstance(config_json, dict):
if not isinstance(config_data, dict):
is_valid = False
errors.append("Config must be a key/value JSON Object.")
return is_valid, errors

for i, token_values in enumerate(config_json.items()):
for i, token_values in enumerate(config_data.items()):
token, values = token_values

if not values:
Expand All @@ -48,7 +49,7 @@ def validate_separate_token_values_config(
def update_separate_token_values_config_with_file_urls(
url: str,
separate_token_values_config_path: str,
use_presigned_urls: bool,
use_presigned_urls: bool = False,
):
try:
files_locations = get_file_urls_from_s3_storage(url)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,15 @@ def _set_tokens_in_form_config_item(item: dict, tokens_values: dict):
def _collect_form_config_items_to_extrapolate(config_data: dict) -> List[dict]:
items_to_extrapolate = []

if not isinstance(config_data, dict):
return items_to_extrapolate

form = config_data["form"]
items_to_extrapolate.append(form)

submit_button = form["submit_button"]
items_to_extrapolate.append(submit_button)

sections = form["sections"]
for section in sections:
items_to_extrapolate.append(section)
Expand Down Expand Up @@ -125,7 +131,7 @@ def _extrapolate_tokens_in_form_config(config_data: dict, tokens_values: dict) -


def _validate_tokens_in_both_configs(
form_config_data, token_sets_values_config_data,
form_config_data: dict, token_sets_values_config_data: List[dict],
) -> Tuple[set, set, list]:
tokens_from_form_config, tokens_in_unexpected_attrs_errors = (
_collect_tokens_from_form_config(form_config_data)
Expand Down Expand Up @@ -245,21 +251,21 @@ def create_extrapolated_config(
exit()


def validate_task_data_config(config_json: List[dict]) -> Tuple[bool, List[str]]:
def validate_task_data_config(config_data: List[dict]) -> Tuple[bool, List[str]]:
is_valid = True
errors = []

if not isinstance(config_json, list):
if not isinstance(config_data, list):
is_valid = False
errors.append("Config must be a JSON Array.")

if config_json:
if not all(config_json):
if config_data:
if not all(config_data):
is_valid = False
errors.append("Task data config must contain at least one non-empty item.")

# Validate each form version contained in task data config
for item in config_json:
for item in config_data:
form_config_is_valid, form_config_errors = validate_form_config(item)
if not form_config_is_valid:
is_valid = False
Expand Down Expand Up @@ -375,11 +381,13 @@ def verify_form_composer_configs(
print(f"\n[red]Provided Form Composer config files are invalid:[/red] {e}\n")


def prepare_task_config_for_review_app(config: dict) -> dict:
config = deepcopy(config)
def prepare_task_config_for_review_app(config_data: dict) -> dict:
config_data = deepcopy(config_data)

procedure_code_regex = r"\s*(.+?)\s*"
tokens_from_inputs, _ = _collect_tokens_from_form_config(config, regex=procedure_code_regex)
tokens_from_inputs, _ = _collect_tokens_from_form_config(
config_data, regex=procedure_code_regex,
)

url_from_rpocedure_code_regex = r"\(\"(.+?)\"\)"
token_values = {}
Expand All @@ -396,5 +404,5 @@ def prepare_task_config_for_review_app(config: dict) -> dict:
presigned_url = get_s3_presigned_url(url, S3_URL_EXPIRATION_MINUTES_MAX)
token_values[token] = presigned_url

prepared_config = _extrapolate_tokens_in_form_config(config, token_values)
prepared_config = _extrapolate_tokens_in_form_config(config_data, token_values)
return prepared_config
28 changes: 14 additions & 14 deletions mephisto/generators/form_composer/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@

CONTENTTYPE_BY_EXTENSION = {
# Docs
'csv': 'text/csv',
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'pdf': 'application/pdf',
"csv": "text/csv",
"doc": "application/msword",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"pdf": "application/pdf",
# Images
'bmp': 'image/bmp',
'gif': 'image/gif',
'heic': 'image/heic',
'heif': 'image/heif',
'jpeg': 'image/jpeg',
'jpg': 'image/jpeg',
'png': 'image/png',
"bmp": "image/bmp",
"gif": "image/gif",
"heic": "image/heic",
"heif": "image/heif",
"jpeg": "image/jpeg",
"jpg": "image/jpeg",
"png": "image/png",
# Videos
'mkv': 'video/x-matroska',
'mp4': 'video/mp4',
'webm': 'video/webm',
"mkv": "video/x-matroska",
"mp4": "video/mp4",
"webm": "video/webm",
}

JSON_IDENTATION = 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from flask import Response
from flask import send_from_directory
from flask.views import MethodView
from werkzeug.exceptions import NotFound

from mephisto.data_model.agent import Agent
from mephisto.data_model.unit import Unit
Expand Down Expand Up @@ -55,5 +56,10 @@ def get(
if filename_by_original_name:
filename = filename_by_original_name

unit_data_folder = unit.get_assigned_agent().get_data_dir()
agent = unit.get_assigned_agent()
if not agent:
app.logger.debug(f"No agent found for {unit}")
raise NotFound("File not found")

unit_data_folder = agent.get_data_dir()
return send_from_directory(unit_data_folder, filename)
11 changes: 7 additions & 4 deletions mephisto/review_app/server/api/views/units_details_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def get(self) -> dict:
task_run: TaskRun = unit.get_task_run()
has_task_source_review = bool(task_run.args.get("blueprint").get("task_source_review"))

inputs = unit_data.get("data", {}).get("inputs")
outputs = unit_data.get("data", {}).get("outputs")
inputs = unit_data.get("data", {}).get("inputs", {})
outputs = unit_data.get("data", {}).get("outputs", {})

# In case if there is outdated code that returns `final_submission`
# under `inputs` and `outputs` keys, we should use the value in side `final_submission`
Expand All @@ -67,9 +67,12 @@ def get(self) -> dict:

# Perform any dynamic action on task config for current unit
# to make it the same as it looked like for a worker
prepared_inputs = prepare_task_config_for_review_app(inputs)
prepared_inputs = inputs
if "form" in inputs:
prepared_inputs = prepare_task_config_for_review_app(inputs)

unit_data_folder = unit.get_assigned_agent().get_data_dir()
agent = unit.get_assigned_agent()
unit_data_folder = agent.get_data_dir() if agent else None

units.append(
{
Expand Down
4 changes: 4 additions & 0 deletions mephisto/scripts/form_composer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
Loading

0 comments on commit fc60794

Please sign in to comment.