Skip to content

Commit

Permalink
v0.8.5
Browse files Browse the repository at this point in the history
  • Loading branch information
Pombert-JF committed Sep 23, 2021
1 parent fbec8ce commit f0b44bf
Show file tree
Hide file tree
Showing 31 changed files with 1,203 additions and 808 deletions.
159 changes: 93 additions & 66 deletions Homology_search/PDB_headers.pl
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
#!/usr/bin/perl
## Pombert Lab 2020
my $version = '0.3c';
my $version = '0.4';
my $name = 'PDB_headers.pl';
my $updated = '2021-07-25';
my $updated = '2021-09-21';

use strict; use warnings; use Getopt::Long qw(GetOptions); use File::Basename;
use File::Find; use PerlIO::gzip;
use strict;
use warnings;
use Getopt::Long qw(GetOptions);
use File::Basename;
use File::Find;
use PerlIO::gzip;

## Usage definition
my $USAGE = <<"OPTIONS";
Expand All @@ -32,11 +36,11 @@

## Defining options
my $pdb;
my $out;
my $rcsb_list;
my $verbose = 1000;
GetOptions(
'p|pdb=s' => \$pdb,
'o|output=s' => \$out,
'o|output=s' => \$rcsb_list,
'v|verbose=i' => \$verbose
);

Expand All @@ -49,8 +53,25 @@
$dir
);

## Doing a first pass to see which files have been parsed previously
## Should reduce overall computation time by skipping parsing
my %previous_data;
my $diamond = '>';
if (-f $rcsb_list){
$diamond = '>>';
open LIST, "<", "$rcsb_list" or die "Can't read $rcsb_list: $!\n";
while (my $line = <LIST>){
chomp $line;
if ($line =~ /^(\w+)/){
my $rcsb_entry = $1;
$previous_data{$rcsb_entry} = 1;
}
}
close LIST;
}

## Parsing PDB files (*.ent.gz)
open OUT, ">", "$out" or die "Can't create file $out: $!\n";
open OUT, "$diamond", "$rcsb_list" or die "Can't create file $rcsb_list: $!\n";

my $pdb_count = 0;
my $start = time;
Expand All @@ -60,75 +81,81 @@

$pdb_count++;

open PDB, "<:gzip", "$pb" or die "Can't open file $pb: $!\n";
## Grabbing RCSB PDB entry name from pdb file
my ($pdb, $folder) = fileparse($pb);
$pdb =~ s/^pdb//;
$pdb =~ s/.ent.gz$//;
my $title = undef;
my %molecules;
my $mol_id = undef;

## verbosity; lots of files to parse...
my $modulo = ($pdb_count % $verbose);
my $current_count = commify($pdb_count);
if ($modulo == 0){ print "Working on PDB file # $current_count: $pb\n"; }

while (my $line = <PDB>){
chomp $line;
## Getting title info from TITLE entries
if ($line =~ /^TITLE\s{5}(.*)$/){
my $key = $1;
$key =~ s/\s+$/ /; ## Discard trailing space characters
$title .= $key;
}
elsif ($line =~ /^TITLE\s+\d+\s(.*)$/){
my $key = $1;
$key =~ s/\s+$/ /; ## Discard trailing space characters
$title .= $key;
}
## Getting chain information from COMPND entries
elsif ($line =~ /^COMPND\s+(\d+)?\s?MOL_ID:\s(\d+)/){
$mol_id = $2;
}
elsif ($line =~ /^COMPND\s+(\d+)?(.*)$/){
my $data = $2;
$data =~ s/\s+$//;
$molecules{$mol_id} .= $data;
}
}
binmode PDB, ":gzip(none)";

## Printing title
print OUT "$pdb\tTITLE\t$title\n";

## Printing chain(s)
foreach my $id (sort (keys %molecules)){

my $molecule;
my $chains;

if ($molecules{$id} =~ /MOLECULE: (.*?);/){
$molecule = $1;
}
elsif($molecules{$id} =~ /MOLECULE: (.*?)/){
$molecule = $1;
## If at end of COMPND section, no semicolon to after the molecule(s)
## Working on PDB if is has not been seen before
if (exists $previous_data{$pdb}) { next; }
else {
open PDB, "<:gzip", "$pb" or die "Can't open file $pb: $!\n";
my $title = undef;
my %molecules;
my $mol_id = undef;

while (my $line = <PDB>){
chomp $line;
## Getting title info from TITLE entries
if ($line =~ /^TITLE\s{5}(.*)$/){
my $key = $1;
$key =~ s/\s+$/ /; ## Discard trailing space characters
$title .= $key;
}
elsif ($line =~ /^TITLE\s+\d+\s(.*)$/){
my $key = $1;
$key =~ s/\s+$/ /; ## Discard trailing space characters
$title .= $key;
}
## Getting chain information from COMPND entries
elsif ($line =~ /^COMPND\s+(\d+)?\s?MOL_ID:\s(\d+)/){
$mol_id = $2;
}
elsif ($line =~ /^COMPND\s+(\d+)?(.*)$/){
my $data = $2;
$data =~ s/\s+$//;
$molecules{$mol_id} .= $data;
}
}

if ($molecules{$id} =~ /CHAIN: (.*?);/){
$chains = $1;
}
elsif ($molecules{$id} =~ /CHAIN: (.*?)/){
$chains = $1;
## If at end of COMPND section, no semicolon to after the chain(s)
}

$chains =~ s/ //g;
my @chains = split (",", $chains);
foreach my $chain (@chains){
if ($molecule){ print OUT "$pdb\t$chain\t$molecule\n"; }
## Molecules might not be defined if engineered
else { print OUT "$pdb\t$chain\tundefined molecule\n"; }
binmode PDB, ":gzip(none)";

## Printing title
print OUT "$pdb\tTITLE\t$title\n";

## Printing chain(s)
foreach my $id (sort (keys %molecules)){

my $molecule;
my $chains;

if ($molecules{$id} =~ /MOLECULE: (.*?);/){
$molecule = $1;
}
elsif($molecules{$id} =~ /MOLECULE: (.*?)/){
$molecule = $1;
## If at end of COMPND section, no semicolon to after the molecule(s)
}

if ($molecules{$id} =~ /CHAIN: (.*?);/){
$chains = $1;
}
elsif ($molecules{$id} =~ /CHAIN: (.*?)/){
$chains = $1;
## If at end of COMPND section, no semicolon to after the chain(s)
}

$chains =~ s/ //g;
my @chains = split (",", $chains);
foreach my $chain (@chains){
if ($molecule){ print OUT "$pdb\t$chain\t$molecule\n"; }
## Molecules might not be defined if engineered
else { print OUT "$pdb\t$chain\tundefined molecule\n"; }
}
}
}
}
Expand Down
11 changes: 7 additions & 4 deletions Homology_search/parse_all_models_by_Q.pl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
my $version = '0.1a';
my $updated = '2021-09-13';

use strict; use warnings; use Getopt::Long qw(GetOptions); use File::Basename;
use strict;
use warnings;
use Getopt::Long qw(GetOptions);
use File::Basename;

my $usage = <<"USAGE";
NAME ${name}
Expand All @@ -14,16 +17,16 @@
from best Q-score to worst.
EXAMPLE ${name} \\
-m *.GESAMT_per_model.matches \\
-m *_GESAMT_per_model.matches \\
-o All_GESAMT_matches_per_protein.tsv \\
-x 50
GENERAL OPTIONS:
-m (--matches) *.GESAMT.matches generated by descriptive_GESAMT_matches.pl
-o (--out) Output file in TSV format [Default: All_GESAMT_matches_per_protein.tsv]
-x (--max) Max number of distinct RCSB/chain hits to keep [Default: 50]
-r (--redun) Keep all entries for redundant RCSB chains [Default: off]
-w (--word) Use word regular expression (\\w+) to capture locus tag [Default: off]
-r (--redun) Keep all entries for redundant RCSB chains [Default: off]
-w (--word) Use word regular expression (\\w+) to capture locus tag [Default: off]
USAGE
die "\n$usage\n" unless @ARGV;

Expand Down
4 changes: 3 additions & 1 deletion Homology_search/update_PDB.pl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
my $name = 'update_PDB.pl';
my $updated = '2021-05-17';

use strict; use warnings; use Getopt::Long qw(GetOptions);
use strict;
use warnings;
use Getopt::Long qw(GetOptions);

## Usage definition
my $USAGE = <<"OPTIONS";
Expand Down
4 changes: 3 additions & 1 deletion Misc_tools/rename_files.pl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
my $name = 'rename_files.pl';
my $updated = '2021-03-12';

use strict; use warnings; use Getopt::Long qw(GetOptions);
use strict;
use warnings;
use Getopt::Long qw(GetOptions);

## Usage definition
my $USAGE = <<"OPTIONS";
Expand Down
5 changes: 4 additions & 1 deletion Misc_tools/split_Fasta.pl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
my $name = 'split_Fasta.pl';
my $updated = '2021-09-07';

use strict; use warnings; use PerlIO::gzip; use Getopt::Long qw(GetOptions);
use strict;
use warnings;
use Getopt::Long qw(GetOptions);
use PerlIO::gzip;

## Usage definition
my $USAGE = <<"OPTIONS";
Expand Down
6 changes: 4 additions & 2 deletions Misc_tools/split_PDB.pl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
my $name = 'split_PDB.pl';
my $updated = '2021-04-06';

use strict; use warnings;
use PerlIO::gzip; use File::Basename;
use strict;
use warnings;
use PerlIO::gzip;
use File::Basename;
use Getopt::Long qw(GetOptions);

## Usage definition
Expand Down
46 changes: 46 additions & 0 deletions Notes/Installation_notes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
## On Fedora 34
##### Installing Aria2, Conda and Docker #####
sudo dnf install aria2 conda docker

## Starting Docker / enabling at boot
sudo systemctl start docker
sudo systemctl enable docker.service
sudo systemctl enable containerd.service

## Testing docker
sudo docker run hello-world

## Creating docker group + add user(s) to it + activate docker group changes
sudo groupadd docker
sudo usermod -aG docker $USER
newgrp docker

## Installing NVIDIA docker container - https://www.if-not-true-then-false.com/2020/install-nvidia-container-toolkit-on-fedora/
sudo wget -O /etc/yum.repos.d/inttf.repo https://rpms.if-not-true-then-false.com/inttf.repo
sudo dnf install nvidia-docker2

## Modifying configuration file
sudo nano /etc/nvidia-container-runtime/config.toml
# Remove comment from 'no-cgroups = false' and change to 'no-cgroups = true'
# Under [nvidia-container-runtime]
# Remove comment (enable): debug = "/var/log/nvidia-container-runtime.log"

## Restarting Docker and checking that nvidia configuration is working
systemctl restart docker
nvidia-container-cli info
docker run \
--privileged \
--gpus all \
--rm nvidia/cuda:11.1-base \
nvidia-smi

## Make sure to add --privileged to prevent 'Failed to initialize NVML: Unknown Error'
## Add user(s) to docker group

## Testing capabilities with sample
docker run \
--privileged \
--gpus all \
--rm nvcr.io/nvidia/k8s/cuda-sample:nbody nbody \
-benchmark \
-numbodies=512000
Loading

0 comments on commit f0b44bf

Please sign in to comment.