-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b5918e0
Showing
77 changed files
with
5,830 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
[flake8] | ||
max-line-length = 150 | ||
# mccabe | ||
max-complexity = 10 | ||
ignore = | ||
# E203 # whitespace before : |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
name: Python package | ||
|
||
on: | ||
push: | ||
branches: [ master ] | ||
pull_request: | ||
branches: [ master ] | ||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
python-version: [3.7, 3.8] | ||
|
||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v1 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install poetry | ||
make install | ||
- name: Lint | ||
run: | | ||
make lint | ||
- name: Test | ||
run: | | ||
make test | ||
- name: Type checking | ||
run: | | ||
make typecheck |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
data/kenlm/* | ||
!data/kenlm/.gitkeep | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
pip-wheel-metadata/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
|
||
BASE_DIR := $(shell pwd) | ||
POETRY_RUN := poetry run | ||
|
||
TRAIN_ARTIFACTS_DIR := ${BASE_DIR}/log/train_$(shell date +'%Y%m%d') | ||
TRAIN_CONFIG_PATH := ${BASE_DIR}/config/train.yaml | ||
EVALUATION_CONFIG_PATH := ${BASE_DIR}/config/evaluation.yaml | ||
PREDICT_CONFIG_PATH := ${BASE_DIR}/config/predict.yaml | ||
INTREPRET_CONFIG_PATH := ${BASE_DIR}/config/interpret.yaml | ||
|
||
# override configs (defaults No overrides) | ||
OVERRIDES := | ||
|
||
TENSORBORD_LOG_DIR := ... | ||
|
||
install: | ||
poetry install | ||
|
||
notebook: | ||
${POETRY_RUN} jupyter-notebook | ||
|
||
train: | ||
@echo training | ||
mkdir -p ${TRAIN_ARTIFACTS_DIR} | ||
${POETRY_RUN} expats train ${TRAIN_CONFIG_PATH} ${TRAIN_ARTIFACTS_DIR} --overrides ${OVERRIDES} | ||
|
||
train-debug: | ||
IS_DEBUG=true make train TRAIN_CONFIG_PATH=config/train_debug.yaml TRAIN_ARTIFACTS_DIR=log/debug | ||
|
||
evaluate: | ||
@echo evaluation on pre-trained model | ||
${POETRY_RUN} expats evaluate ${EVALUATION_CONFIG_PATH} --overrides ${OVERRIDES} | ||
|
||
evaluate-debug: | ||
IS_DEBUG=true make evaluate EVALUATION_CONFIG_PATH=config/evaluate_debug.yaml | ||
|
||
predict: | ||
@echo evaluation on pre-trained model | ||
${POETRY_RUN} expats predict ${PREDICT_CONFIG_PATH} ${PREDICT_OUTPUT_PATH} --overrides ${OVERRIDES} | ||
|
||
predict-debug: | ||
IS_DEBUG=true make predict PREDICT_CONFIG_PATH=config/predict_debug.yaml PREDICT_OUTPUT_PATH=log/debug_predict | ||
|
||
interpret: | ||
@echo interpreting pre-trained model | ||
${POETRY_RUN} expats interpret ${INTREPRET_CONFIG_PATH} --overrides ${OVERRIDES} | ||
|
||
interpret-debug: | ||
IS_DEBUG=true make interpret INTREPRET_CONFIG_PATH=config/interpret_debug.yaml | ||
|
||
train-then-evaluate: | ||
$(eval ARTIFACT_PATH := ${BASE_DIR}/log/$(shell date +'%Y%m%d%H%M%S')) | ||
make train TRAIN_CONFIG_PATH=${TRAIN_CONFIG_PATH} TRAIN_ARTIFACTS_DIR=${ARTIFACT_PATH} OVERRIDES='${TRAIN_OVERRIDES}' | ||
make evaluate EVALUATION_CONFIG_PATH=${EVALUATION_CONFIG_PATH} OVERRIDES='artifact_path=${ARTIFACT_PATH} ${EVALUATION_OVERRIDES}' | ||
rm -rf ${ARTIFACT_PATH} | ||
|
||
tensorboard: | ||
${POETRY_RUN} tensorboard --logdir ${TENSORBORD_LOG_DIR} | ||
|
||
# CI | ||
lint: | ||
${POETRY_RUN} flake8 --show-source --statistics ./expats ./tests | ||
|
||
test: | ||
${POETRY_RUN} pytest -rf --cov=./expats ./tests | ||
|
||
typecheck: | ||
@echo currently not support to check types |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# EXPATS: A Toolkit for Explainable Automated Text Scoring | ||
|
||
![EXPATS: A Toolkit for Explainable Automated Text Scoring](overview.png) | ||
|
||
EXPATS is an open-source framework for automated text scoring (ATS) tasks, such as automated essay scoring and readability assessment. Users can develop and experiment with different ATS models quickly by using the toolkit's easy-to-use components, the configuration system, and the command-line interface. The toolkit also provides seamless integration with [the Language Interpretability Tool (LIT)](https://pair-code.github.io/lit/) so that one can interpret and visualize models and their predictions. | ||
|
||
## Requirements | ||
|
||
- [poetry](https://python-poetry.org/) | ||
|
||
## Usage | ||
|
||
1. Clone this repository. | ||
|
||
```bash | ||
$ git clone [email protected]:octanove/expats.git | ||
$ cd expats | ||
``` | ||
|
||
2. Install Python dependencies via poetry, and launch an interactive shell | ||
|
||
```bash | ||
$ poetry install | ||
$ poetry shell | ||
``` | ||
|
||
3. Prepare the dataset for your task | ||
|
||
We'll use ASAP-AES, a standard dataset for autoamted essay scoring. You can download the dataset from [the Kaggle page](https://www.kaggle.com/c/asap-aes). EXPATS supports a dataset reader for ASAP-AES by default. | ||
|
||
4. Write a config file | ||
|
||
In the config file, you specify the type of the task (`task`), the type of the profiler (`profiler`) and its hyperparmeters, and the dataset to use (`dataset`). An example config file for training a BERT-based regressor for ASAP-AES is shown below. | ||
|
||
```bash | ||
$ cat config/asap_aes/train_bert.yaml | ||
task: regression | ||
|
||
profiler: | ||
type: TransformerRegressor | ||
params: | ||
trainer: | ||
gpus: 1 | ||
max_epochs: 80 | ||
accumulate_grad_batches: 2 | ||
network: | ||
output_normalized: true | ||
pretrained_model_name_or_path: bert-base-uncased | ||
lr: 4e-5 | ||
data_loader: | ||
batch_size: 8 | ||
val_ratio: 0.2 | ||
max_length: null | ||
|
||
dataset: | ||
type: asap-aes | ||
params: | ||
path: data/asap-aes/training_set_rel3.tsv | ||
``` | ||
|
||
5. Train your model | ||
|
||
You can train the model by running the `expats train` command as shown below. | ||
|
||
```bash | ||
$ expats train config/asap_aes/train_bert.yaml artifacts | ||
``` | ||
|
||
The result (e.g., log file, the model weights) is stored in the directory `artifacts`. | ||
|
||
6. Evalute your model | ||
|
||
You can evaluate your model by running: | ||
|
||
```bash | ||
$ expats evaluate config/asap_aes/evaluate.yaml | ||
``` | ||
|
||
You can also configure the evaluation settings by modifying the configuration file. | ||
|
||
7. Interpret your model | ||
|
||
You can launch the LIT server to interpret and visualize the trained model and its behavior: | ||
```bash | ||
$ expats interpret config/asap_aes/interpret.yaml | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
|
||
# Please set your artifact path specified when training | ||
artifact_path: <fill> | ||
|
||
dataset: | ||
type: asap-aes | ||
params: | ||
path: /home/manabe_h/workspace/octanove/profiler/data/asap-aes/training_set_rel3.tsv.random11680_rest | ||
prompt_id: <fill> | ||
|
||
output_convert: | ||
regression_to_classification: | ||
type: MinMaxDenormalizedRoundNearestInteger | ||
params: | ||
x_min: <fill> | ||
x_max: <fill> | ||
|
||
metrics: | ||
classification: | ||
- type: MacroF1 | ||
params: | ||
- type: MicroF1 | ||
params: | ||
- type: Accuracy | ||
params: | ||
- type: QuadraticWeightedKappa | ||
params: | ||
regression: | ||
- type: PearsonCorrelation | ||
params: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
task: regression | ||
|
||
profiler: | ||
type: TransformerRegressor | ||
params: | ||
trainer: | ||
gpus: 1 | ||
max_epochs: 80 | ||
accumulate_grad_batches: 2 | ||
network: | ||
output_normalized: true | ||
pretrained_model_name_or_path: bert-base-uncased | ||
lr: 4e-5 | ||
data_loader: | ||
batch_size: 8 | ||
val_ratio: 0.2 | ||
max_length: null | ||
|
||
dataset: | ||
type: asap-aes | ||
params: | ||
path: data/asap-aes/training_set_rel3.tsv.random11680 |
Oops, something went wrong.