Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mana-ysh committed Apr 6, 2021
0 parents commit b5918e0
Show file tree
Hide file tree
Showing 77 changed files with 5,830 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[flake8]
max-line-length = 150
# mccabe
max-complexity = 10
ignore =
# E203 # whitespace before :
36 changes: 36 additions & 0 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Python package

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
make install
- name: Lint
run: |
make lint
- name: Test
run: |
make test
- name: Type checking
run: |
make typecheck
132 changes: 132 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
data/kenlm/*
!data/kenlm/.gitkeep

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
68 changes: 68 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@

BASE_DIR := $(shell pwd)
POETRY_RUN := poetry run

TRAIN_ARTIFACTS_DIR := ${BASE_DIR}/log/train_$(shell date +'%Y%m%d')
TRAIN_CONFIG_PATH := ${BASE_DIR}/config/train.yaml
EVALUATION_CONFIG_PATH := ${BASE_DIR}/config/evaluation.yaml
PREDICT_CONFIG_PATH := ${BASE_DIR}/config/predict.yaml
INTREPRET_CONFIG_PATH := ${BASE_DIR}/config/interpret.yaml

# override configs (defaults No overrides)
OVERRIDES :=

TENSORBORD_LOG_DIR := ...

install:
poetry install

notebook:
${POETRY_RUN} jupyter-notebook

train:
@echo training
mkdir -p ${TRAIN_ARTIFACTS_DIR}
${POETRY_RUN} expats train ${TRAIN_CONFIG_PATH} ${TRAIN_ARTIFACTS_DIR} --overrides ${OVERRIDES}

train-debug:
IS_DEBUG=true make train TRAIN_CONFIG_PATH=config/train_debug.yaml TRAIN_ARTIFACTS_DIR=log/debug

evaluate:
@echo evaluation on pre-trained model
${POETRY_RUN} expats evaluate ${EVALUATION_CONFIG_PATH} --overrides ${OVERRIDES}

evaluate-debug:
IS_DEBUG=true make evaluate EVALUATION_CONFIG_PATH=config/evaluate_debug.yaml

predict:
@echo evaluation on pre-trained model
${POETRY_RUN} expats predict ${PREDICT_CONFIG_PATH} ${PREDICT_OUTPUT_PATH} --overrides ${OVERRIDES}

predict-debug:
IS_DEBUG=true make predict PREDICT_CONFIG_PATH=config/predict_debug.yaml PREDICT_OUTPUT_PATH=log/debug_predict

interpret:
@echo interpreting pre-trained model
${POETRY_RUN} expats interpret ${INTREPRET_CONFIG_PATH} --overrides ${OVERRIDES}

interpret-debug:
IS_DEBUG=true make interpret INTREPRET_CONFIG_PATH=config/interpret_debug.yaml

train-then-evaluate:
$(eval ARTIFACT_PATH := ${BASE_DIR}/log/$(shell date +'%Y%m%d%H%M%S'))
make train TRAIN_CONFIG_PATH=${TRAIN_CONFIG_PATH} TRAIN_ARTIFACTS_DIR=${ARTIFACT_PATH} OVERRIDES='${TRAIN_OVERRIDES}'
make evaluate EVALUATION_CONFIG_PATH=${EVALUATION_CONFIG_PATH} OVERRIDES='artifact_path=${ARTIFACT_PATH} ${EVALUATION_OVERRIDES}'
rm -rf ${ARTIFACT_PATH}

tensorboard:
${POETRY_RUN} tensorboard --logdir ${TENSORBORD_LOG_DIR}

# CI
lint:
${POETRY_RUN} flake8 --show-source --statistics ./expats ./tests

test:
${POETRY_RUN} pytest -rf --cov=./expats ./tests

typecheck:
@echo currently not support to check types
86 changes: 86 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# EXPATS: A Toolkit for Explainable Automated Text Scoring

![EXPATS: A Toolkit for Explainable Automated Text Scoring](overview.png)

EXPATS is an open-source framework for automated text scoring (ATS) tasks, such as automated essay scoring and readability assessment. Users can develop and experiment with different ATS models quickly by using the toolkit's easy-to-use components, the configuration system, and the command-line interface. The toolkit also provides seamless integration with [the Language Interpretability Tool (LIT)](https://pair-code.github.io/lit/) so that one can interpret and visualize models and their predictions.

## Requirements

- [poetry](https://python-poetry.org/)

## Usage

1. Clone this repository.

```bash
$ git clone [email protected]:octanove/expats.git
$ cd expats
```

2. Install Python dependencies via poetry, and launch an interactive shell

```bash
$ poetry install
$ poetry shell
```

3. Prepare the dataset for your task

We'll use ASAP-AES, a standard dataset for autoamted essay scoring. You can download the dataset from [the Kaggle page](https://www.kaggle.com/c/asap-aes). EXPATS supports a dataset reader for ASAP-AES by default.

4. Write a config file

In the config file, you specify the type of the task (`task`), the type of the profiler (`profiler`) and its hyperparmeters, and the dataset to use (`dataset`). An example config file for training a BERT-based regressor for ASAP-AES is shown below.

```bash
$ cat config/asap_aes/train_bert.yaml
task: regression

profiler:
type: TransformerRegressor
params:
trainer:
gpus: 1
max_epochs: 80
accumulate_grad_batches: 2
network:
output_normalized: true
pretrained_model_name_or_path: bert-base-uncased
lr: 4e-5
data_loader:
batch_size: 8
val_ratio: 0.2
max_length: null

dataset:
type: asap-aes
params:
path: data/asap-aes/training_set_rel3.tsv
```

5. Train your model

You can train the model by running the `expats train` command as shown below.

```bash
$ expats train config/asap_aes/train_bert.yaml artifacts
```

The result (e.g., log file, the model weights) is stored in the directory `artifacts`.

6. Evalute your model

You can evaluate your model by running:

```bash
$ expats evaluate config/asap_aes/evaluate.yaml
```

You can also configure the evaluation settings by modifying the configuration file.

7. Interpret your model

You can launch the LIT server to interpret and visualize the trained model and its behavior:
```bash
$ expats interpret config/asap_aes/interpret.yaml
```
30 changes: 30 additions & 0 deletions config/asap_aes/evaluate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

# Please set your artifact path specified when training
artifact_path: <fill>

dataset:
type: asap-aes
params:
path: /home/manabe_h/workspace/octanove/profiler/data/asap-aes/training_set_rel3.tsv.random11680_rest
prompt_id: <fill>

output_convert:
regression_to_classification:
type: MinMaxDenormalizedRoundNearestInteger
params:
x_min: <fill>
x_max: <fill>

metrics:
classification:
- type: MacroF1
params:
- type: MicroF1
params:
- type: Accuracy
params:
- type: QuadraticWeightedKappa
params:
regression:
- type: PearsonCorrelation
params:
22 changes: 22 additions & 0 deletions config/asap_aes/train_bert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
task: regression

profiler:
type: TransformerRegressor
params:
trainer:
gpus: 1
max_epochs: 80
accumulate_grad_batches: 2
network:
output_normalized: true
pretrained_model_name_or_path: bert-base-uncased
lr: 4e-5
data_loader:
batch_size: 8
val_ratio: 0.2
max_length: null

dataset:
type: asap-aes
params:
path: data/asap-aes/training_set_rel3.tsv.random11680
Loading

0 comments on commit b5918e0

Please sign in to comment.