diff --git a/mlprogram/__init__.py b/mlprogram/__init__.py index b836967..d702714 100644 --- a/mlprogram/__init__.py +++ b/mlprogram/__init__.py @@ -1 +1,19 @@ +import mlprogram.actions as actions # noqa import mlprogram.builtins as builtins # noqa +import mlprogram.collections as collections # noqa +import mlprogram.datasets as datasets # noqa +import mlprogram.distributed as distributed # noqa +import mlprogram.encoders as encoders # noqa +import mlprogram.entrypoint as entrypoint # noqa +import mlprogram.functools as functools # noqa +import mlprogram.logging as logging # noqa +# languages +import mlprogram.metrics as metrics # noqa +import mlprogram.pytorch_pfn_extras as pytorch_pfn_extras # noqa +import mlprogram.random as random # noqa +import mlprogram.transpyle as transpyle # noqa +# nn +# samplers +# synthesizers +# transforms +# utils diff --git a/mlprogram/actions/action.py b/mlprogram/actions/action.py index 70ddc9c..3443717 100644 --- a/mlprogram/actions/action.py +++ b/mlprogram/actions/action.py @@ -2,7 +2,7 @@ from enum import Enum from typing import Any, Generic, List, Optional, Tuple, TypeVar, Union -from mlprogram.languages import Root +from mlprogram.languages.ast import Root Value = TypeVar("Value") diff --git a/mlprogram/actions/action_sequence.py b/mlprogram/actions/action_sequence.py index a0cefd1..3bec47d 100644 --- a/mlprogram/actions/action_sequence.py +++ b/mlprogram/actions/action_sequence.py @@ -13,7 +13,7 @@ NodeType, Rule, ) -from mlprogram.languages import AST, Field, Leaf, Node, Root +from mlprogram.languages.ast import AST, Field, Leaf, Node, Root logger = logging.Logger(__name__) diff --git a/mlprogram/datasets/__init__.py b/mlprogram/datasets/__init__.py index 273e462..8b45f54 100644 --- a/mlprogram/datasets/__init__.py +++ b/mlprogram/datasets/__init__.py @@ -1,4 +1,5 @@ -import os - -DEFAULT_CACHE_DIR = \ - os.path.join(os.environ["HOME"], ".cache", "mlprogram", "datasets") +from mlprogram.datasets import deepfix # noqa +from mlprogram.datasets import django # noqa +from mlprogram.datasets import hearthstone # noqa +from mlprogram.datasets import nl2bash # noqa +from mlprogram.datasets.constants import DEFAULT_CACHE_DIR # noqa diff --git a/mlprogram/datasets/constants.py b/mlprogram/datasets/constants.py new file mode 100644 index 0000000..5ab45fb --- /dev/null +++ b/mlprogram/datasets/constants.py @@ -0,0 +1,3 @@ +import os + +DEFAULT_CACHE_DIR = os.path.join(os.environ["HOME"], ".cache", "mlprogram", "datasets") diff --git a/mlprogram/datasets/deepfix/download.py b/mlprogram/datasets/deepfix/download.py index 7d98ba6..462d0a2 100644 --- a/mlprogram/datasets/deepfix/download.py +++ b/mlprogram/datasets/deepfix/download.py @@ -8,10 +8,10 @@ from typing import Callable from mlprogram import logging -from mlprogram.builtins import Environment -from mlprogram.datasets import DEFAULT_CACHE_DIR -from mlprogram.functools import file_cache -from mlprogram.utils.data import ListDataset +from mlprogram.builtins.datatypes import Environment +from mlprogram.datasets.constants import DEFAULT_CACHE_DIR +from mlprogram.functools.cache import file_cache +from mlprogram.utils.data.utils import ListDataset logger = logging.Logger(__name__) @@ -19,8 +19,7 @@ def default_get(src: str, dst: str): - with urllib.request.urlopen(src) as src_file, \ - open(dst, "wb") as dst_file: + with urllib.request.urlopen(src) as src_file, open(dst, "wb") as dst_file: copyfileobj(src_file, dst_file) diff --git a/mlprogram/datasets/deepfix/lexer.py b/mlprogram/datasets/deepfix/lexer.py index 6fde29a..3cc4ded 100644 --- a/mlprogram/datasets/deepfix/lexer.py +++ b/mlprogram/datasets/deepfix/lexer.py @@ -44,8 +44,8 @@ from typing import List, Optional, Tuple from mlprogram import logging -from mlprogram.languages import Lexer as BaseLexer -from mlprogram.languages import Token +from mlprogram.languages.lexer import Lexer as BaseLexer +from mlprogram.languages.token import Token logger = logging.Logger(__name__) diff --git a/mlprogram/datasets/django/download.py b/mlprogram/datasets/django/download.py index 567d9f0..b4eaf07 100644 --- a/mlprogram/datasets/django/download.py +++ b/mlprogram/datasets/django/download.py @@ -4,11 +4,11 @@ import requests from mlprogram import logging -from mlprogram.builtins import Environment -from mlprogram.datasets import DEFAULT_CACHE_DIR +from mlprogram.builtins.datatypes import Environment +from mlprogram.datasets.constants import DEFAULT_CACHE_DIR from mlprogram.datasets.django.format_annotations import format_annotations -from mlprogram.functools import file_cache -from mlprogram.utils.data import ListDataset +from mlprogram.functools.cache import file_cache +from mlprogram.utils.data.utils import ListDataset logger = logging.Logger(__name__) @@ -29,8 +29,7 @@ def download(cache_path: str = os.path.join(DEFAULT_CACHE_DIR, "django.pt"), @file_cache(cache_path) def _download(): return { - "annotation": format_annotations( - get(BASE_PATH + "all.anno").split("\n")), + "annotation": format_annotations(get(BASE_PATH + "all.anno").split("\n")), "code": get(BASE_PATH + "all.code").split("\n") } data = _download() diff --git a/mlprogram/datasets/django/functions.py b/mlprogram/datasets/django/functions.py index 3e9f356..41bf635 100644 --- a/mlprogram/datasets/django/functions.py +++ b/mlprogram/datasets/django/functions.py @@ -3,7 +3,7 @@ from nltk import tokenize -from mlprogram.languages import Token +from mlprogram.languages.token import Token tokenizer = tokenize.WhitespaceTokenizer() @@ -52,8 +52,7 @@ def placeholder(id: int) -> str: else: reference.append(Token[str, str](None, word, word)) - vars = list(filter(lambda x: len(x) > 0, - word.split('.'))) # split by '.' + vars = list(filter(lambda x: len(x) > 0, word.split('.'))) # split by '.' if len(vars) > 1: for v in vars: reference.append(Token(None, v, v)) diff --git a/mlprogram/datasets/django/parser.py b/mlprogram/datasets/django/parser.py index 6437b80..35dfc16 100644 --- a/mlprogram/datasets/django/parser.py +++ b/mlprogram/datasets/django/parser.py @@ -51,8 +51,8 @@ import re from typing import Callable, List, Optional -from mlprogram.languages import AST -from mlprogram.languages.python import Parser as BaseParser +from mlprogram.languages.ast import AST +from mlprogram.languages.python.parser import Parser as BaseParser p_elif = re.compile(r'^elif\s?') p_else = re.compile(r'^else\s?') diff --git a/mlprogram/datasets/hearthstone/download.py b/mlprogram/datasets/hearthstone/download.py index 5216ecf..1b66144 100644 --- a/mlprogram/datasets/hearthstone/download.py +++ b/mlprogram/datasets/hearthstone/download.py @@ -4,10 +4,10 @@ import requests from mlprogram import logging -from mlprogram.builtins import Environment -from mlprogram.datasets import DEFAULT_CACHE_DIR -from mlprogram.functools import file_cache -from mlprogram.utils.data import ListDataset +from mlprogram.builtins.datatypes import Environment +from mlprogram.datasets.constants import DEFAULT_CACHE_DIR +from mlprogram.functools.cache import file_cache +from mlprogram.utils.data.utils import ListDataset logger = logging.Logger(__name__) @@ -19,8 +19,7 @@ def default_get(path: str) -> str: return requests.get(path).text -def download(cache_path: str = os.path.join(DEFAULT_CACHE_DIR, - "hearthstone.pt"), +def download(cache_path: str = os.path.join(DEFAULT_CACHE_DIR, "hearthstone.pt"), base_path: str = BASE_PATH, get: Callable[[str], str] = default_get) \ -> Dict[str, ListDataset]: diff --git a/mlprogram/datasets/hearthstone/functions.py b/mlprogram/datasets/hearthstone/functions.py index 1e2f876..d8e17ac 100644 --- a/mlprogram/datasets/hearthstone/functions.py +++ b/mlprogram/datasets/hearthstone/functions.py @@ -1,6 +1,6 @@ from typing import List, Optional -from mlprogram.languages import Token +from mlprogram.languages.token import Token class TokenizeQuery: diff --git a/mlprogram/datasets/nl2bash/download.py b/mlprogram/datasets/nl2bash/download.py index 1fd6e63..5c8402a 100755 --- a/mlprogram/datasets/nl2bash/download.py +++ b/mlprogram/datasets/nl2bash/download.py @@ -4,10 +4,10 @@ from typing import Dict from mlprogram import logging -from mlprogram.builtins import Environment -from mlprogram.datasets import DEFAULT_CACHE_DIR -from mlprogram.functools import file_cache -from mlprogram.utils.data import ListDataset +from mlprogram.builtins.datatypes import Environment +from mlprogram.datasets.constants import DEFAULT_CACHE_DIR +from mlprogram.functools.cache import file_cache +from mlprogram.utils.data.utils import ListDataset logger = logging.Logger(__name__) diff --git a/mlprogram/datasets/nl2bash/functions.py b/mlprogram/datasets/nl2bash/functions.py index 847bf2c..8e4a945 100644 --- a/mlprogram/datasets/nl2bash/functions.py +++ b/mlprogram/datasets/nl2bash/functions.py @@ -3,7 +3,7 @@ from nltk import tokenize -from mlprogram.languages import Token +from mlprogram.languages.token import Token tokenizer = tokenize.WhitespaceTokenizer() diff --git a/mlprogram/encoders/action_sequence_encoder.py b/mlprogram/encoders/action_sequence_encoder.py index 143359b..010f6c3 100644 --- a/mlprogram/encoders/action_sequence_encoder.py +++ b/mlprogram/encoders/action_sequence_encoder.py @@ -5,8 +5,7 @@ from torchnlp.encoders import LabelEncoder from mlprogram import logging -from mlprogram.actions import ( - ActionSequence, +from mlprogram.actions.action import ( ApplyRule, CloseVariadicFieldRule, ExpandTreeRule, @@ -14,7 +13,8 @@ NodeType, Rule, ) -from mlprogram.languages import Token +from mlprogram.actions.action_sequence import ActionSequence +from mlprogram.languages.token import Token logger = logging.Logger(__name__) diff --git a/mlprogram/entrypoint/__init__.py b/mlprogram/entrypoint/__init__.py index ee41d33..b2ca71a 100644 --- a/mlprogram/entrypoint/__init__.py +++ b/mlprogram/entrypoint/__init__.py @@ -1,3 +1,4 @@ +from mlprogram.entrypoint import modules # noqa from mlprogram.entrypoint.evaluate import EvaluateSample # noqa from mlprogram.entrypoint.evaluate import EvaluateSynthesizer # noqa from mlprogram.entrypoint.evaluate import evaluate # noqa diff --git a/mlprogram/entrypoint/configs.py b/mlprogram/entrypoint/configs.py index f1b78c3..82bec44 100644 --- a/mlprogram/entrypoint/configs.py +++ b/mlprogram/entrypoint/configs.py @@ -5,7 +5,7 @@ from pytorch_pfn_extras.config import Config from mlprogram.entrypoint.types import types -from mlprogram.functools import file_cache +from mlprogram.functools.cache import file_cache def with_file_cache(path, config, types): diff --git a/mlprogram/entrypoint/evaluate.py b/mlprogram/entrypoint/evaluate.py index ec954b4..2d1f0d4 100644 --- a/mlprogram/entrypoint/evaluate.py +++ b/mlprogram/entrypoint/evaluate.py @@ -12,9 +12,9 @@ from tqdm import tqdm from mlprogram import distributed, logging -from mlprogram.builtins import Environment -from mlprogram.synthesizers import Synthesizer -from mlprogram.utils.data import ListDataset +from mlprogram.builtins.datatypes import Environment +from mlprogram.synthesizers.synthesizer import Synthesizer +from mlprogram.utils.data.utils import ListDataset logger = logging.Logger(__name__) diff --git a/mlprogram/entrypoint/modules/__init__.py b/mlprogram/entrypoint/modules/__init__.py index e69de29..179d188 100644 --- a/mlprogram/entrypoint/modules/__init__.py +++ b/mlprogram/entrypoint/modules/__init__.py @@ -0,0 +1,4 @@ +import mlprogram.entrypoint.modules.fairseq as fairseq # noqa +import mlprogram.entrypoint.modules.numpy as numpy # noqa +import mlprogram.entrypoint.modules.torch as torch # noqa +import mlprogram.entrypoint.modules.torchnlp as torchnlp # noqa diff --git a/mlprogram/entrypoint/modules/fairseq.py b/mlprogram/entrypoint/modules/fairseq.py index 576e5f1..a24c6ab 100644 --- a/mlprogram/entrypoint/modules/fairseq.py +++ b/mlprogram/entrypoint/modules/fairseq.py @@ -1,5 +1,9 @@ -import fairseq.optim +try: + import fairseq.optim -types = { - "fairseq.optim.Adafactor": lambda: fairseq.optim.adafactor.Adafactor -} + types = { + "fairseq.optim.Adafactor": lambda: fairseq.optim.adafactor.Adafactor + } +except: # noqa + types = {} + pass diff --git a/mlprogram/entrypoint/train.py b/mlprogram/entrypoint/train.py index 2f0957f..f5ff319 100644 --- a/mlprogram/entrypoint/train.py +++ b/mlprogram/entrypoint/train.py @@ -10,9 +10,9 @@ from torch.utils.data import DataLoader from mlprogram import distributed, logging -from mlprogram.builtins import Environment +from mlprogram.builtins.datatypes import Environment from mlprogram.pytorch_pfn_extras import SaveTopKModel, StopByThreshold -from mlprogram.synthesizers import Synthesizer +from mlprogram.synthesizers.synthesizer import Synthesizer logger = logging.Logger(__name__) diff --git a/mlprogram/entrypoint/types.py b/mlprogram/entrypoint/types.py index 21bcdf6..8e16e1b 100644 --- a/mlprogram/entrypoint/types.py +++ b/mlprogram/entrypoint/types.py @@ -28,16 +28,11 @@ import mlprogram.transforms.pbe import mlprogram.transforms.text import mlprogram.utils.data +from mlprogram.entrypoint.modules.fairseq import types as fairseq_types from mlprogram.entrypoint.modules.numpy import types as numpy_types from mlprogram.entrypoint.modules.torch import types as torch_types from mlprogram.entrypoint.modules.torchnlp import types as torchnlp_types -try: - from mlprogram.entrypoint.modules.fairseq import types as fairseq_types -except: # noqa - fairseq_types = {} - pass - types = { "select": lambda key, options: options[key], diff --git a/mlprogram/metrics/error_correct_rate.py b/mlprogram/metrics/error_correct_rate.py index faed4a9..7e17b1b 100644 --- a/mlprogram/metrics/error_correct_rate.py +++ b/mlprogram/metrics/error_correct_rate.py @@ -2,7 +2,8 @@ from torch import nn -from mlprogram.languages import Analyzer, Interpreter +from mlprogram.languages.analyzer import Analyzer +from mlprogram.languages.interpreter import Interpreter Code = TypeVar("Code") Error = TypeVar("Error") diff --git a/mlprogram/metrics/metric.py b/mlprogram/metrics/metric.py index cec34ae..0a9daa2 100644 --- a/mlprogram/metrics/metric.py +++ b/mlprogram/metrics/metric.py @@ -3,9 +3,11 @@ from torch import nn -from mlprogram.builtins import Apply, Environment, Pick -from mlprogram.functools import Sequence -from mlprogram.nn import Function +from mlprogram.builtins.apply import Apply +from mlprogram.builtins.datatypes import Environment +from mlprogram.builtins.pick import Pick +from mlprogram.functools.functions import Sequence +from mlprogram.nn.function import Function Value = TypeVar("Value") diff --git a/mlprogram/metrics/test_case_result.py b/mlprogram/metrics/test_case_result.py index c0be2fd..340e85a 100644 --- a/mlprogram/metrics/test_case_result.py +++ b/mlprogram/metrics/test_case_result.py @@ -2,8 +2,8 @@ from torch import nn -from mlprogram.builtins import Environment -from mlprogram.languages import Interpreter +from mlprogram.builtins.datatypes import Environment +from mlprogram.languages.interpreter import Interpreter from mlprogram.metrics.accuracy import Accuracy from mlprogram.metrics.metric import use_environment diff --git a/mlprogram/pytorch_pfn_extras.py b/mlprogram/pytorch_pfn_extras.py index 83077fd..2dbdee3 100644 --- a/mlprogram/pytorch_pfn_extras.py +++ b/mlprogram/pytorch_pfn_extras.py @@ -6,7 +6,7 @@ from torch import nn from mlprogram import logging -from mlprogram.collections import TopKElement +from mlprogram.collections.top_k_element import TopKElement logger = logging.Logger(__name__)