Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V1.1.0.20240131 #35

Merged
merged 12 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/heatcluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install dependencies
run: pip3 install pandas numpy pathlib seaborn matplotlib scipy
run: pip3 install pandas numpy pathlib seaborn seaborn_polars polars pyarrow matplotlib scipy

- name: test (tab-delimited)
run: python3 heatcluster.py -i test/small_matrix.csv
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM ubuntu:jammy as app

# default version
ARG HEATCLUSTER_VER="1.0.2c"
ARG HEATCLUSTER_VER="1.1.0.20240131"

# adding labels
LABEL base.image="ubuntu:jammy"
Expand All @@ -27,7 +27,7 @@ RUN apt-get update && apt-get upgrade -y && \
apt-get autoclean && rm -rf /var/lib/apt/lists/*

# installing python dependencies
RUN pip3 install --no-cache argparse pandas numpy pathlib seaborn matplotlib scipy --upgrade-strategy=only-if-needed
RUN pip3 install --no-cache argparse pandas numpy pathlib seaborn seaborn_polars polars pyarrow matplotlib scipy --upgrade-strategy=only-if-needed

# copying files to docker image
COPY . /heatcluster
Expand Down
21 changes: 13 additions & 8 deletions heatcluster.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/usr/bin/python3

###########################################
# heatcluster-1.0.2c #
# heatcluster-1.1.0.20240131 #
# written by Stephen Beckstrom-Sternberg #
# Creates SNP heatmaps #
# from SNP matrices #
# Outputs sorted csv SNP matrix #
# Uses Polars instead of Pandas #
###########################################

import argparse
Expand All @@ -16,6 +17,10 @@
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import polars as pl
import seaborn_polars as snl
import pyarrow


logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%y-%b-%d %H:%M:%S', level=logging.INFO)

Expand Down Expand Up @@ -64,18 +69,17 @@ def read_snp_matrix(file):
file (str): SNP dist output file that should be converted to pandas dataframe

Returns:
df (DataFrame): Pandas dataframe of SNP matrix.
df (DataFrame): Polars dataframe of SNP matrix.
"""
logging.debug('Determining if file is comma or tab delimited')
tabs = pd.read_csv(file, nrows=1, sep='\t').shape[1]
commas = pd.read_csv(file, nrows=1, sep=',').shape[1]
tabs = pl.scan_csv(file, n_rows=1, separator='\t').shape[1]
commas = pl.scan_csv(file, n_rows=1, separator=',').shape[1]
if tabs > commas:
logging.debug('The file is tab-delimited')
df = pd.read_csv(file, sep='\t', index_col=False)
df = pl.scan_csv(file, separator='\t', index_col=False)
else:
logging.debug('The file is comma-delimited')
df = pd.read_csv(file, sep=',', index_col=False)

df = pl.scan_csv(file, separator=',', index_col=False)
return df

def clean_and_read_df(df):
Expand Down Expand Up @@ -182,7 +186,8 @@ def create_heatmap(df, fontSize, labelSize, figsize, labels):
fig,ax = plt.subplots(figsize=figsize)
logging.debug('Creating heatmap')

heatmap = sns.heatmap(
#heatmap = sns.heatmap(
heatmap = snl.heatmap(
df,
xticklabels=True,
yticklabels=True,
Expand Down
Loading