Skip to content

Commit

Permalink
seq2seq: revert per-batch language assignment
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanik12 committed Oct 17, 2024
1 parent c7a581d commit a56b937
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 4 deletions.
1 change: 0 additions & 1 deletion adaptor/objectives/CLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def _get_seq2seq_collated_iterator(self,
# yield last nonempty residual batch
yield self.collator(features_batch)


def _compute_loss(self,
logit_outputs: torch.FloatTensor,
labels: torch.LongTensor,
Expand Down
5 changes: 2 additions & 3 deletions adaptor/objectives/seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,9 @@ def _get_seq2seq_collated_iterator(self,
:return: Iterator of encoded batches.
"""
features_batch = []
self.tokenizer.src_lang = self.source_lang_id
self.tokenizer.tgt_lang = self.target_lang_id

for source_text, target_text in zip(source_texts, target_texts):
self.tokenizer.src_lang = self.source_lang_id
self.tokenizer.tgt_lang = self.target_lang_id
sample_features = self.tokenizer(source_text, truncation=True)

with self.tokenizer.as_target_tokenizer():
Expand Down

0 comments on commit a56b937

Please sign in to comment.