Skip to content

Commit

Permalink
Finalizing refactoring of 'replicate results'
Browse files Browse the repository at this point in the history
  • Loading branch information
recursion-ninja committed Mar 1, 2022
1 parent 475514e commit d7b14fa
Show file tree
Hide file tree
Showing 12 changed files with 133 additions and 198 deletions.
47 changes: 0 additions & 47 deletions README.md

This file was deleted.

1 change: 1 addition & 0 deletions README.md
1 change: 1 addition & 0 deletions app/generate-timings/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ timeFilePoint taxaNumPadder strLenPadder counter tcmPath fp = do
, "--output"
, "/dev/null"
]
putStrLn commandStr

p <- makeCleanProcess counter [] commandStr

Expand Down
29 changes: 0 additions & 29 deletions bin/measure-scaling-performance.sh

This file was deleted.

File renamed without changes.
File renamed without changes.
7 changes: 7 additions & 0 deletions ChangeLog.md → doc/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Revision history for commutative-multi-string-alignment

## 1.1.1 -- 2022-03-01

* Refactoring replicate results script
* Droppping `stack` build support



## 1.1.0 -- 2022-02-23

* Added support for GHC `9.0.*` and `9.2.*`
Expand Down
File renamed without changes.
40 changes: 40 additions & 0 deletions doc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
Efficient Implied Alignment
=============================

[![DOI:10.1186/s12859-020-03595-2](https://zenodo.org/badge/DOI/10.1186/s12859-020-03595-2.svg)](https://doi.org/10.1186/s12859-020-03595-2)

This repository hosts the program `implied-align` implementing the algorithm described in the paper *Efficient Implied Alignment* and a script to replicate the results of the paper.

### Installation of `implied-align`

You can build and install `implied-align` from source using the Haskell build system `ghcup` via the supplied `makefile` from the source directory of this project:

```
$ make install
```

After the `make install` command has completed, the `implied-align` binary will be placed in this project's `bin` directory.

### Running `implied-align`

The `implied-align` program takes a number of command line arguments to specify inputs and outputs. For more information run the following command:

```
$ implied-align --help
```

### Replicating results of the paper

For convience of replicating the results of the paper, a "replicate-results" script has been provided. All that is need to replicate the paper's results is to run the following command:

```
$ make replicate
```

This will create a `replicate-results` directory, with sub directories `csv`, `data`, `img`, `taxa`, and `tree`.

- The `taxa` directory holds temporary files used in pruning the data-sets.
- The `data` & `tree` directories hold the pruned input files for the data-sets.
- The `csv` directory holds the timing information measured during the replication of the results.
- The `img` directory holds the generted images from the results.

6 changes: 3 additions & 3 deletions efficient-implied-alignment.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ category: Algorithms
build-type: Simple

name: efficient-implied-alignment
version: 1.1.0
version: 1.1.1

author: Alex Washburn
maintainer: [email protected]
Expand Down Expand Up @@ -31,8 +31,8 @@ tested-with:
GHC == 9.2.1

extra-source-files:
ChangeLog.md
README.md
doc/CHANGELOG.md
doc/README.md

source-repository head
type: git
Expand Down
128 changes: 80 additions & 48 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,64 +6,96 @@
#
# # # # #

bin-dir := ./bin
data-dir := replicate-results
executable := $(bin-dir)/generate-timings
generate := $(bin-dir)/measure-scaling-performance.sh using
biological-scale := [1%1,1%2,1%4,1%8,1%16,1%32]
customized-scale := [1%1,1%2,1%4,1%8,1%16,1%32,1%64]
customized-nodes := [4,8,16,32,64,128,256]
dir-bin := ./bin
dir-data := replicate-results
bin-aln := implied-align
bin-aln-path := $(dir-bin)/$(bin-aln)
bin-gen := generate-timings
bin-gen-path := $(dir-bin)/$(bin-gen)
measure-script := measure-scaling-performance.sh
measure-dataset := $(dir-bin)/$(measure-script) using
prerequisites := $(bin-aln-path) $(bin-aln-path) ensure-python ensure-workspace


# All synonyms for replicating the paper's results.
all: results

replicate: results

reproduce: results

results: fungi metazoa pathological
results: fungi metazoa pathological


# Install dependencies required to replicate results.
ensure-haskell:
@command -v ghcup >/dev/null 2>&1 || \
curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | sh

# Install dependencies required to replicate results:
ensure-Haskell: $(bin-dir)/generate-timings
@command -v ghcup &> /dev/null || curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | sh
ensure-python:
@command -v python3 >/dev/null 2>&1 || apt-get install python3 --yes
@command -v pip3 >/dev/null 2>&1 || apt-get install python3-pip --yes
@pip3 show matplotlib >/dev/null 2>&1 || pip3 install --upgrade matplotlib

ensure-Python:
@command -v python3 &> /dev/null || apt-get install python3 --yes
@command -v pip3 &> /dev/null || apt-get install python3-pip --yes
@pip3 show matplotlib &> /dev/null || pip3 install --upgrade matplotlib
ensure-workspace:
@$(eval make-workspace := mkdir -p $(dir-data))
@$(make-workspace)/csv
@$(make-workspace)/data
@$(make-workspace)/img
@$(make-workspace)/taxa
@$(make-workspace)/tree

ensure-R:
command -v &> /dev/null || sudo apt install r-base

compile-binaries: ensure-Haskell
@echo "Copiling binaries"
@ghcup run \
--ghc 9.2.1 \
--cabal 3.6.2.0 \
-- cabal update && \
cabal install \
--installdir=$(bin-dir) \
--install-method=copy
# Build binaries
require := ensure-haskell $(wildcard app/**/*.hs) $(wildcard src/**/*.hs)
install := ghcup run --ghc 9.2.1 --cabal 3.6.2.0 -- \
cabal update && cabal install $(bin-gen) $(bin-aln) \
--installdir=$(dir-bin) --install-method=copy

ensure-workspace:
@mkdir -p $(data-dir)/csv
@mkdir -p $(data-dir)/data
@mkdir -p $(data-dir)/img
@mkdir -p $(data-dir)/taxa
@mkdir -p $(data-dir)/tree

fungi: compile-binaries ensure-Python ensure-R ensure-workspace
$(generate) \
'fungi' '11' '[25,50,100,200,400,800,1553]' $(biological-scale)

metazoa: compile-binaries ensure-Python ensure-R ensure-workspace
$(generate) \
'metazoa' '11' '[25,50,100,200,400,800,1766]' $(biological-scale)

pathological: compile-binaries ensure-Python ensure-R ensure-workspace
$(generate) \
'pathological' '12' $(customized-nodes) $(customized-scale)
$(generate) \
'pathological' '31' $(customized-nodes) $(customized-scale) '--no-generate'
$(bin-aln-path): $(require)
@$(install)

$(bin-gen-path): $(require)
@$(install)


# Generate timing data of data sets
biological-scale := '\[1%1,1%2,1%4,1%8,1%16,1%32\]'
customized-scale := '\[1%1,1%2,1%4,1%8,1%16,1%32,1%64\]'
customized-nodes := '\[4,8,16,32,64,128,256\]'

fungi: $(prerequisites)
@$(MAKE) --no-print-directory measure \
name='fungi' cost='11' sizes='\[25,50,100,200,400,800,1553\]' scale=$(biological-scale)

metazoa: $(prerequisites)
@$(MAKE) --no-print-directory measure \
name='metazoa' cost='11' sizes='\[25,50,100,200,400,800,1766\]' scale=$(biological-scale)

pathological: $(prerequisites)
@$(MAKE) --no-print-directory measure \
name='pathological' cost='12' sizes=$(customized-nodes) scale=$(customized-scale)

@$(MAKE) --no-print-directory measure \
name='pathological' cost='31' sizes=$(customized-nodes) scale=$(customized-scale) flags='--no-generate'

measure:
./bin/generate-timings $(flags) \
--data data-sets/$(name).afasta \
--tree data-sets/$(name).tree \
--tcm data-sets/tcm-$(cost).tcm \
--output $(name)-$(cost) \
--leaves $(sizes) \
--lengths $(scale)

python3 ./bin/plot-figure.py \
"replicate-results/csv/$(name)-$(cost).preorder.csv" \
"replicate-results/img/$(name)-preorder.eps"
python3 ./bin/plot-figure.py \
"replicate-results/csv/$(name)-$(cost).postorder.csv" \
"replicate-results/img/$(name)-postorder.eps"

# Clean up after replicating results
clean:
@rm -fr replicate-results
@rm -fr dist-newstyle
@rm -f $(bin-aln-path)
@rm -f $(bin-gen-path)
58 changes: 1 addition & 57 deletions replicate-results.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/bin/bash


# # # # #
#
# Script to reproduce the results of the paper from a clean environment.
Expand All @@ -9,59 +8,4 @@
#
# # # # #


# Install dependencies required to replicate results:
# * Load Python3 dependencies
which python3 &> /dev/null || apt-get install python3 --yes
which pip3 &> /dev/null || apt-get install python3-pip --yes
pip3 show matplotlib &> /dev/null || pip3 install --upgrade matplotlib

# * Load R dependencies
which R &> /dev/null || sudo apt install r-base

# * Load Haskell dependencies
which ghcup &> /dev/null || curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | sh


# Build binaries
ghcup run \
--ghc 9.2.1 \
--cabal 3.6.2.0 \
-- cabal update && \
cabal install \
--installdir=./bin \
--install-method=copy


# Ensure output directories exist to place results data
mkdir -p replicate-results/csv
mkdir -p replicate-results/data
mkdir -p replicate-results/img
mkdir -p replicate-results/taxa
mkdir -p replicate-results/tree


# Run data sets to produce the results data
function generate()
{
./bin/generate-timings $5 \
--data "data-sets/$1.afasta" \
--tree "data-sets/$1.tree" \
--tcm "data-sets/tcm-${2}.tcm" \
--output "$1-$2" \
-n "$3" \
-k "$4"

python3 plot-figure.py \
"replicate-results/csv/${1}-${2}.preorder.csv" \
"replicate-results/img/${1}-preorder.eps"

python3 plot-figure.py \
"replicate-results/csv/${1}-${2}.postorder.csv" \
"replicate-results/img/${1}-postorder.eps"
}

generate 'fungi' '11' '[25,50,100,200,400,800,1553]' '[1%1,1%2,1%4,1%8,1%16,1%32]'
generate 'metazoa' '11' '[25,50,100,200,400,800,1766]' '[1%1,1%2,1%4,1%8,1%16,1%32]'
generate 'pathological' '12' '[4,8,16,32,64,128,256]' '[1%1,1%2,1%4,1%8,1%16,1%32,1%64]'
generate 'pathological' '31' '[4,8,16,32,64,128,256]' '[1%1,1%2,1%4,1%8,1%16,1%32,1%64]' '--no-generate'
make all
14 changes: 0 additions & 14 deletions stack.yaml

This file was deleted.

0 comments on commit d7b14fa

Please sign in to comment.