Fixed bug with filtering/sampling in all2all variants and new2all alw… #15
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GitHub Actions CI | |
on: | |
push: | |
branches: [ master, develop] | |
paths-ignore: | |
- '**.md' | |
workflow_dispatch: | |
jobs: | |
######################################################################################## | |
make: | |
name: Make | |
strategy: | |
fail-fast: false | |
matrix: | |
machine: [ubuntu-latest, macOS-12] | |
runs-on: ['${{ matrix.machine }}'] | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- name: make | |
run: | | |
g++ --version | |
make -j CXX=g++-12 | |
- name: tar artifacts | |
run: tar -cvf kmer-db.tar ./kmer-db ./test/virus | |
- uses: actions/upload-artifact@v4 | |
with: | |
name: executable-artifact-${{ matrix.machine }} | |
path: ./kmer-db.tar | |
######################################################################################## | |
virus: | |
needs: make | |
name: Virus data | |
strategy: | |
fail-fast: false | |
matrix: | |
machine: [ubuntu-latest, macOS-12] | |
runs-on: ['${{ matrix.machine }}'] | |
env: | |
INPUT_DIR: ./test/virus | |
steps: | |
- uses: actions/download-artifact@v4 | |
with: | |
name: executable-artifact-${{ matrix.machine }} | |
path: ./ | |
- name: untar artifacts | |
run: | | |
tar -xf kmer-db.tar | |
- name: build | |
run: | | |
./kmer-db build ${INPUT_DIR}/seqs.part1.list k18.parts.db | |
- name: new2all | |
run: | | |
./kmer-db new2all k18.parts.db ${INPUT_DIR}/seqs.part2.list k18.n2a.csv | |
cmp k18.n2a.csv ${INPUT_DIR}/k18.n2a.csv | |
- name: new2all (sparse) | |
run: | | |
./kmer-db new2all -sparse k18.parts.db ${INPUT_DIR}/seqs.part2.list k18.n2a.sparse.csv | |
cmp k18.n2a.sparse.csv ${INPUT_DIR}/k18.n2a.sparse.csv | |
- name: extend | |
run: | | |
./kmer-db build -extend -k 25 ${INPUT_DIR}/seqs.part2.list k18.parts.db | |
- name: all2all | |
run: | | |
./kmer-db all2all k18.parts.db k18.csv | |
cmp k18.csv ${INPUT_DIR}/k18.csv | |
- name: all2all (sparse) | |
run: | | |
./kmer-db all2all -sparse k18.parts.db k18.sparse.csv | |
cmp k18.sparse.csv ${INPUT_DIR}/k18.sparse.csv | |
- name: distance | |
run: | | |
./kmer-db distance jaccard k18.csv k18.csv.jaccard | |
./kmer-db distance min k18.csv k18.csv.min | |
./kmer-db distance max k18.csv k18.csv.max | |
./kmer-db distance cosine k18.csv k18.csv.cosine | |
./kmer-db distance mash k18.csv k18.csv.mash | |
cmp k18.csv.jaccard ${INPUT_DIR}/k18.csv.jaccard | |
cmp k18.csv.min ${INPUT_DIR}/k18.csv.min | |
cmp k18.csv.max ${INPUT_DIR}/k18.csv.max | |
cmp k18.csv.cosine ${INPUT_DIR}/k18.csv.cosine | |
cmp k18.csv.mash ${INPUT_DIR}/k18.csv.mash | |
- name: build (default k) + all2all | |
run: | | |
./kmer-db build ${INPUT_DIR}/seqs.list k18.db | |
./kmer-db all2all k18.db k18.csv | |
cmp k18.csv ${INPUT_DIR}/k18.csv | |
- name: build (default k, multifasta) + all2all | |
run: | | |
./kmer-db build -multisample-fasta ${INPUT_DIR}/multi.list k18.multi.db | |
./kmer-db all2all k18.multi.db k18.multi.csv | |
cmp k18.multi.csv ${INPUT_DIR}/k18.csv | |
- name: build (default k, 2 x multifasta) + all2all | |
run: | | |
./kmer-db build -multisample-fasta ${INPUT_DIR}/multi.split.list k18.multi.split.db | |
./kmer-db all2all k18.multi.split.db k18.multi.split.csv | |
cmp k18.multi.split.csv ${INPUT_DIR}/k18.csv | |
- name: build (default k) + extend + all2all | |
run: | | |
./kmer-db build ${INPUT_DIR}/seqs.part1.list k18.parts.db | |
./kmer-db build -extend -k 25 ${INPUT_DIR}/seqs.part2.list k18.parts.db | |
./kmer-db all2all k18.parts.db k18.parts.csv | |
cmp k18.parts.csv ${INPUT_DIR}/k18.csv | |
- name: build (default k, fraction 0.1) + all2all | |
run: | | |
./kmer-db build -f 0.1 ${INPUT_DIR}/seqs.list k18.frac.db | |
./kmer-db all2all k18.frac.db k18.frac.csv | |
cmp k18.frac.csv ${INPUT_DIR}/k18.frac.csv | |
- name: minhash (default k, fraction 0.1) + build + all2all | |
run: | | |
./kmer-db minhash -f 0.1 ${INPUT_DIR}/seqs.list | |
./kmer-db build -from-minhash ${INPUT_DIR}/seqs.list k18.minhash.db | |
./kmer-db all2all k18.minhash.db k18.minhash.csv | |
cmp k18.minhash.csv ${INPUT_DIR}/k18.frac.csv | |
- name: build (k=24) + all2all | |
run: | | |
./kmer-db build -k 24 ${INPUT_DIR}/seqs.list k24.db | |
./kmer-db all2all k24.db k24.csv | |
cmp k24.csv ${INPUT_DIR}/k24.csv | |
- name: build (k=25, f=0.1) + one2all | |
run: | | |
./kmer-db build -k 25 -f 0.1 ${INPUT_DIR}/seqs.part1.list k25.db | |
./kmer-db one2all k25.db ${INPUT_DIR}/data/MT159713 MT159713.csv | |
cmp MT159713.csv ${INPUT_DIR}/MT159713.csv | |
- name: new2all (against itself) | |
run: | | |
./kmer-db new2all k18.db ${INPUT_DIR}/seqs.list k18.n2a.itself.csv | |
cmp k18.n2a.itself.csv ${INPUT_DIR}/k18.n2a.itself.csv | |