Skip to content

Commit

Permalink
Update to version 1.5.0
Browse files Browse the repository at this point in the history
see changelog for details
  • Loading branch information
AlexanderGress committed Apr 30, 2024
1 parent c7c0bb8 commit 22c2aae
Show file tree
Hide file tree
Showing 39 changed files with 1,713 additions and 1,971 deletions.
6 changes: 6 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
Version 1.5.0
- Gene report function is now functional
- Reorganized features into classes, making adding new features in the future easier
This how features are stored and retrieved from the database, thus a new major version change was necessary.
- Improved the installer

Version 1.4.1
- Paralellized th MicroMiner lookup pipeline
- Continued implementation of the gene report function
Expand Down
40 changes: 23 additions & 17 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,23 @@ do
esac
done

verbose_stdout=3
verbose_stderr=4

if [ "$verbose" = true ]; then
verbose_stdout=/dev/stdout
verbose_stderr=/dev/stderr
eval "exec $verbose_stdout>&1"
eval "exec $verbose_stderr>&2"
else
verbose_stdout=/dev/null
verbose_stderr=/dev/null
eval "exec $verbose_stdout>/dev/null"
eval "exec $verbose_stderr>/dev/null"
fi

#Check if conda environment already exits, create it if not
env_list_result=$(conda env list | grep "$env_name")
if [ -z "$env_list_result" ]
then
echo "Conda environment with name $env_name not in current environment list, setting up new environment ..."
conda create -n "$env_name" python=$current_python_version -y >$verbose_stdout
conda create -n "$env_name" python=$current_python_version -y >&$verbose_stdout
else
echo "Conda environment with name $env_name already in environment list."
fi
Expand All @@ -68,13 +71,13 @@ new_env_path=$(conda env list | awk -v name="$env_name" '/^[^#]/{ if ($1 == name
source "$conda_bash_path"
conda activate "$env_name"
echo "$new_env_path"" activated"
} >$verbose_stdout
} >&$verbose_stdout

mamba_version_test_output=$(mamba --version 2>/dev/null)

if [ -z "$mamba_version_test_output" ]
then
conda install -y -c conda-forge mamba >$verbose_stdout 2>$verbose_stderr
conda install -y -c conda-forge mamba >&$verbose_stdout 2>&$verbose_stderr
fi

#install dependencies
Expand All @@ -94,15 +97,18 @@ fi
mamba install -y requests-toolbelt
echo "Installing package pymol ..."
mamba install -y -c conda-forge pymol-open-source
} >$verbose_stdout
} >&$verbose_stdout

#install the main package
echo "Installing StructMAn source code using pip ..."
pip install "$SCRIPTPATH" >$verbose_stdout
pip install "$SCRIPTPATH" >&$verbose_stdout

#install mmseqs2
echo "Installing MMseqs2 ..."
mamba install -y -c bioconda mmseqs2 >$verbose_stdout
mamba install -y -c bioconda mmseqs2 >&$verbose_stdout

#install wkhtmltopdf
mamba install -y -c conda-forge wkhtmltopdf >&$verbose_stdout

if [ -z $storage_folder ]
then
Expand Down Expand Up @@ -134,10 +140,10 @@ fi
cp "$path_to_dssp_binary" "$new_env_path"/bin/smssp

#Installing specific boost
mamba install -y libboost==1.82 >$verbose_stdout
mamba install -y libboost==1.82 >&$verbose_stdout

#Installing grpc
mamba install -y conda-forge::grpc-cpp==1.51.1 >$verbose_stdout
pip install grpcio >$verbose_stdout

#Init config file
structman_config_path="$new_env_path"/lib/python"$current_python_version"/site-packages/structman/structman_config.txt
Expand All @@ -148,7 +154,7 @@ echo " $structman_config_path"
structman config mmseqs_tmp_folder "$tmp_folder_path" -c "$structman_config_path"
structman config dssp_path smssp -c "$structman_config_path"
structman config mmseqs2_db_path "$storage_folder"/pdbba_search_db_mmseqs2
} >$verbose_stdout 2>$verbose_stderr
} >&$verbose_stdout 2>&$verbose_stderr

#install modeller
if ! [ -z "$modeller_key" ]
Expand All @@ -161,7 +167,7 @@ then
mamba install -y modeller
echo "Setting the given modeller key to the modeller config ..."
structman set_m_key "$modeller_key"
} >$verbose_stdout 2>$verbose_stderr
} >&$verbose_stdout 2>&$verbose_stderr
fi

echo "Setting StructMAn config and database ..."
Expand All @@ -177,7 +183,7 @@ then
structman config db_address "$database_server" -c "$structman_config_path"
echo "Setting up the database ..."
structman database create --compute_ppi
} >$verbose_stdout 2>$verbose_stderr
} >&$verbose_stdout 2>&$verbose_stderr
else
database_name=main_db
local_database_path="$local_database_folder"/local_structman_database.db
Expand All @@ -187,10 +193,10 @@ else
structman config db_name "$database_name" -c "$structman_config_path"
structman config db_address "-"
structman database create --compute_ppi
} >$verbose_stdout 2>$verbose_stderr
} >&$verbose_stdout 2>&$verbose_stderr
fi

structman update check_search_db >$verbose_stdout 2>$verbose_stderr
structman update check_search_db >&$verbose_stdout 2>&$verbose_stderr


echo "StructMAn successfully installed, please activate the right conda environment before using it:"
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"numpy>=1.22.3",
"psutil>=5.8.0",
"pymysql>=1.0.2",
"ray==2.9.1",
"ray==2.10.0",
"msgpack>=1.0.3",
"zstd>=1.5.2.5",
"pandas>=1.3.3",
Expand All @@ -55,7 +55,8 @@
"pycairo>=1.23.0",
"powerlaw>=1.5",
"biotite>=0.38.0",
"markdown>=2.6.9"
"markdown>=2.6.9",
"pdfkit==1.0.0"
],

package_data = {
Expand Down
2 changes: 1 addition & 1 deletion structman/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.4.1'
__version__ = '1.5.0'
65 changes: 65 additions & 0 deletions structman/base_utils/base_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@

from structman import settings

try:
from structguy.sampleSpace import CrossValidationSlice
except:
pass

import structman.lib.sdsc as sdsc

import zstd
Expand Down Expand Up @@ -330,6 +335,32 @@ def custom_encoder(obj):
serialized_snv.append(obj.__getattribute__(attribute_name))
return {'__snv__': True, 'as_list': serialized_snv}

if 'Microminer_features' in str(type(obj)): #isinstance just won't work, don't know why
serialized_object = []
for attribute_name in obj.__slots__:
serialized_object.append(obj.__getattribute__(attribute_name))
return {'__Microminer_features__': True, 'as_list': serialized_object}

if 'Structural_features' in str(type(obj)): #isinstance just won't work, don't know why
serialized_object = []
for attribute_name in obj.__slots__:
serialized_object.append(obj.__getattribute__(attribute_name))
return {'__Structural_features__': True, 'as_list': serialized_object}

if 'Integrated_features' in str(type(obj)): #isinstance just won't work, don't know why
serialized_object = []
for attribute_name in obj.__slots__:
serialized_object.append(obj.__getattribute__(attribute_name))
return {'__Integrated_features__': True, 'as_list': serialized_object}

if 'RIN_based_features' in str(type(obj)): #isinstance just won't work, don't know why
serialized_object = obj.get_raw_list()
return {'__RIN_based_features__': True, 'as_list': serialized_object}

if 'CrossValidationSlice' in str(type(obj)): #isinstance just won't work, don't know why
serialized_object = obj.get_raw_list()
return {'__CrossValidationSlice__': True, 'as_list': serialized_object}

return obj

def custom_decoder(obj):
Expand Down Expand Up @@ -449,6 +480,40 @@ def custom_decoder(obj):
for i, attribute_name in enumerate(snv_obj.__slots__):
snv_obj.__setattr__(attribute_name, serialized_snv[i])
return snv_obj

if '__Microminer_features__' in obj:
serialized_object = obj['as_list']
rebuild_obj = sdsc.mappings.Microminer_features()
for i, attribute_name in enumerate(rebuild_obj.__slots__):
rebuild_obj.__setattr__(attribute_name, serialized_object[i])
return rebuild_obj

if '__Structural_features__' in obj:
serialized_object = obj['as_list']
rebuild_obj = sdsc.mappings.Structural_features()
for i, attribute_name in enumerate(rebuild_obj.__slots__):
rebuild_obj.__setattr__(attribute_name, serialized_object[i])
return rebuild_obj

if '__Integrated_features__' in obj:
serialized_object = obj['as_list']
rebuild_obj = sdsc.mappings.Integrated_features()
for i, attribute_name in enumerate(rebuild_obj.__slots__):
rebuild_obj.__setattr__(attribute_name, serialized_object[i])
return rebuild_obj

if '__RIN_based_features__' in obj:
raw_rin_based_features = obj['as_list']
rebuild_obj = sdsc.mappings.RIN_based_features()
rebuild_obj.set_values(raw_rin_based_features)
return rebuild_obj

if '__CrossValidationSlice__' in obj:
serialized_object = obj['as_list']
rebuild_obj = CrossValidationSlice(raw_init = True)
for i, attribute_name in enumerate(rebuild_obj.__slots__):
rebuild_obj.__setattr__(attribute_name, serialized_object[i])
return rebuild_obj

return obj

Expand Down
28 changes: 18 additions & 10 deletions structman/lib/MMseqs2.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def apply_mmseqs(mmseqs_tmp_folder, mmseqs2_path, temp_fasta, search_db, gigs_of
temp_outfile = '%s/tmp_outfile_%s.fasta' % (mmseqs_tmp_folder, randomString())

if verbosity >= 2:
print(mmseqs2_path, 'easy-search', temp_fasta, search_db, temp_outfile, mmseqs_tmp_folder)
print(f'\nApply MMseqs2: {mmseqs2_path}, easy-search, {temp_fasta}, {search_db}, {temp_outfile}, {mmseqs_tmp_folder}\n')

out_format_str = 'query,target,fident,alnlen,tlen,qcov'

Expand Down Expand Up @@ -175,12 +175,26 @@ def apply_mmseqs(mmseqs_tmp_folder, mmseqs2_path, temp_fasta, search_db, gigs_of

if verbosity >= 3:
print(f'MMseqs2 results parsed: size of hit map: {len(hits)}')
if verbosity >= 4 and len(hits) < 50:
print(hits)

os.remove(temp_outfile)

return hits, pdb_ids, debug_store


def wipe_folder(config, folder_path):
for fn in os.listdir(folder_path):
subfolder_path = '%s/%s' % (folder_path, fn)
if os.path.exists(subfolder_path):
if os.path.getmtime(subfolder_path) > config.prog_start_time:
try:
shutil.rmtree(subfolder_path)
except:
if config.verbosity >= 4:
config.errorlog.add_warning('Tmp folder wipe failed for: %s' % subfolder_path)


# called by serializePipeline
def search(proteins, config, custom_db=False):

Expand Down Expand Up @@ -235,6 +249,8 @@ def search(proteins, config, custom_db=False):

t0 = time.time()

wipe_folder(config, mmseqs_tmp_folder)

temp_fasta = '%s/tmp_%s.fasta' % (mmseqs_tmp_folder, randomString())
to_fasta_out = geneSeqMapToFasta(proteins, temp_fasta, config)

Expand Down Expand Up @@ -271,15 +287,7 @@ def search(proteins, config, custom_db=False):
hits[u_ac] = {(pdb_id, chain): [100.0, 1.0, [chain], len(proteins.get_sequence(u_ac)), len(proteins.get_sequence(u_ac))]}
pdb_ids.add(pdb_id)

for fn in os.listdir(mmseqs_tmp_folder):
subfolder_path = '%s/%s' % (mmseqs_tmp_folder, fn)
if os.path.exists(subfolder_path):
if os.path.getmtime(subfolder_path) > config.prog_start_time:
try:
shutil.rmtree(subfolder_path)
except:
if config.verbosity >= 4:
config.errorlog.add_warning('Tmp folder wipe failed for: %s' % subfolder_path)
wipe_folder(config, mmseqs_tmp_folder)

search_results.append((hits, pdb_ids, is_model_db))

Expand Down
Loading

0 comments on commit 22c2aae

Please sign in to comment.