diff --git a/src/alphafold3/data/tools/jackhmmer.py b/src/alphafold3/data/tools/jackhmmer.py index 2b0fd368d98bcd41433dd39f621cd3f62fb5819b..d1771a3f2776a2227109878dbacea5a1b64fa1cd 100644 --- a/src/alphafold3/data/tools/jackhmmer.py +++ b/src/alphafold3/data/tools/jackhmmer.py @@ -124,20 +124,11 @@ class Jackhmmer(msa_tool.MsaTool): log_on_process_error=True, ) - # Parse sequences (to remove line breaks). with open(output_sto_path) as f: output_sto_str = f.read() - output_a3m_str = parsers.convert_stockholm_to_a3m(output_sto_str) - a3m = [] - for i, (seq, name) in enumerate( - parsers.lazy_parse_fasta_string(output_a3m_str) - ): - if i == self.max_sequences: - # Apply the maximum MSA depth limit. - logging.info('Limiting MSA depth to %d', self.max_sequences) - break - a3m.append(f'>{name}\n{seq}') - a3m = '\n'.join(a3m) + a3m = parsers.convert_stockholm_to_a3m( + output_sto_str, max_sequences=self.max_sequences + ) return msa_tool.MsaToolResult( target_sequence=target_sequence, a3m=a3m, e_value=self.e_value diff --git a/src/alphafold3/data/tools/nhmmer.py b/src/alphafold3/data/tools/nhmmer.py index a4b40896a85d910c08cd16a7ceb31d0619877576..e792699d908b3e53c251f45f153d8a96e4253e00 100644 --- a/src/alphafold3/data/tools/nhmmer.py +++ b/src/alphafold3/data/tools/nhmmer.py @@ -136,7 +136,9 @@ class Nhmmer(msa_tool.MsaTool): sto_out = f.read() if sto_out: - a3m_out = parsers.convert_stockholm_to_a3m(sto_out) + a3m_out = parsers.convert_stockholm_to_a3m( + sto_out, max_sequences=self._max_sequences - 1 # Query not included. + ) # Nhmmer hits are generally shorter than the query sequence. To get an MSA # of width equal to the query sequence, align hits to the query profile. logging.info('Aligning output a3m of size %d bytes', len(a3m_out)) @@ -152,14 +154,8 @@ class Nhmmer(msa_tool.MsaTool): ) a3m_out = ''.join([target_sequence_fasta, a3m_out]) - # Parse sequences (to remove line breaks). - a3m = [] - for i, (seq, name) in enumerate(parsers.lazy_parse_fasta_string(a3m_out)): - if i == self._max_sequences: - # Apply the maximum MSA depth limit. - logging.info('Limiting MSA depth to %d', self._max_sequences) - break - a3m.append(f'>{name}\n{seq}') + # Parse the output a3m to remove line breaks. + a3m = [f'>{n}\n{s}' for s, n in parsers.lazy_parse_fasta_string(a3m_out)] a3m = '\n'.join(a3m) else: # Nhmmer returns an empty file if there are no hits.