Skip to content

Commit

Permalink
Adds query_multiple to jackhammer.py
Browse files Browse the repository at this point in the history
  • Loading branch information
jnwei committed Dec 4, 2023
1 parent 7e2b36e commit 4958683
Showing 1 changed file with 23 additions and 14 deletions.
37 changes: 23 additions & 14 deletions openfold/data/tools/jackhmmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,26 @@ def _query_chunk(
)

return raw_output

def query(self,
input_fasta_path: str,
max_sequences: Optional[int] = None
) -> Sequence[Mapping[str, Any]]:
return self.query_multiple([input_fasta_path], max_sequences)[0]

def query_multiple(self,
input_fasta_paths: str,
max_sequences: Optional[int] = None
) -> Sequence[Sequence[Mapping[str, Any]]]:
"""Queries the database using Jackhmmer."""
if self.num_streamed_chunks is None:
single_chunk_result = self._query_chunk(
input_fasta_path, self.database_path, max_sequences,
)
return [single_chunk_result]
single_chunk_results = []
for input_fasta_path in input_fasta_paths:
single_chunk_result = self._query_chunk(
input_fasta_path, self.database_path, max_sequences,
)
single_chunk_results.append(single_chunk_result)
return single_chunk_results

db_basename = os.path.basename(self.database_path)
db_remote_chunk = lambda db_idx: f"{self.database_path}.{db_idx}"
Expand All @@ -211,7 +220,7 @@ def query(self,

# Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk
with futures.ThreadPoolExecutor(max_workers=2) as executor:
chunked_output = []
chunked_outputs = [[] for _ in range(len(input_fasta_paths))]
for i in range(1, self.num_streamed_chunks + 1):
# Copy the chunk locally
if i == 1:
Expand All @@ -229,21 +238,21 @@ def query(self,

# Run Jackhmmer with the chunk
future.result()
chunked_output.append(
self._query_chunk(
input_fasta_path,
db_local_chunk(i),
max_sequences
for fasta_idx, input_fasta_path in enumerate(input_fasta_paths):
chunked_outputs[fasta_idx].append(
self._query_chunk(
input_fasta_path,
db_local_chunk(i),
max_sequences
)
)
)

# Remove the local copy of the chunk
os.remove(db_local_chunk(i))
future = next_future
# Do not set next_future for the last chunk so that this works
# even for databases with only 1 chunk
if(i < self.num_streamed_chunks):
future = next_future
if self.streaming_callback:
self.streaming_callback(i)
return chunked_output
return chunked_outputs

0 comments on commit 4958683

Please sign in to comment.