-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathZINC_libraryFromSubset.pl
70 lines (58 loc) · 1.93 KB
/
ZINC_libraryFromSubset.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/perl
#################################################################
# ZINC_libraryFromSubset.pl
#
# Description: File should be in format: smiles id\n
#################################################################
use File::Path;
use File::Basename;
use File::Spec;
use Getopt::Long;
my %options=();
GetOptions("subset=s" => \$SUBSET,
"prefix=s" => \$PREFIX,
"h!" => \$HELP,
"library=s" =>\$ALT_LIBRARY,
"verbose!" =>\$VERBOSE,
"usual_only!" => \$USUAL_ONLY);
my $USAGE = $0." -subset Subsets/23_t60.20090707.smi -prefix 23_t60 -library /Volumes/Alpha/Zinc/Zinc-Library/\n";
my $SYSTEM = $ENV{'PS1'};
$SYSTEM=~s/\\[a-z]|[:@\.>]//g;
$SYSTEM=~s/\s+//g;
my $USER = `whoami`;chomp($USER);
my $DATE = `date +%Y%m%d`;chomp($DATE);
#my $DESCRIPTION = $PREFIX."_".$PROTEIN."_".basename($LIBRARY);
$ZINC_LIBRARY = ($ALT_LIBRARY) ? $ALT_LIBRARY : "/home/abinkows/Zinc/Zinc-Library/";
die("ZINC_LIBRARY not found") if !-e $ZINC_LIBRARY;
################################################################
#if($SYSTEM eq "surveyor") {
my $DB_PATH = $PREFIX.".$DATE.db";
my $MISSING_PATH = $PREFIX.".$DATE.missing";
$ZINC_LIBRARY = File::Spec->rel2abs($ZINC_LIBRARY);
open(OUT,">$DB_PATH") or die("Couldn't open $DB_PATH");
#open(MISSING,">$MISSING_PATH") or die("Couldn't open $MISSING_PATH");
open(SUBSET,"<$SUBSET") or die("Couldn't open $SUBSET");
while(<SUBSET>) {
chomp();
($smiles,$id)=split;
$subdir=substr($id,4,2);
$subsubdir=substr($id,6,2);
$subsubsubdir=substr($id,8,2);
print "$id $smiles\n" if $VERBOSE;
foreach(glob("$ZINC_LIBRARY/$subdir/$subsubdir/$subsubsubdir/$id.*-*.mol2")) {
$_=~/$id.(\d)\-(\d).mol2/;
$level=$1;
$count=$2;
next if $USUAL_ONLY && $level==0;
#print "$level $count - $_\n";;
#if (-e $_) {
print OUT "$_ $smiles\n";
#} else {
# print MISSING "$_ $smiles\n";
#}
}
print "." if ($count++%100==0);
}
close SUBSET;
#close MISSING;
close OUT;