Skip to content

Commit

Permalink
Added some temp logging
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Egert <[email protected]>
  • Loading branch information
trias702 committed Jan 23, 2025
1 parent 7178c88 commit b93549f
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions nemo_aligner/algorithms/self_rewarding.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def self_rewarding_custom_collate(batch, eos_id):
"answers_only": answer_ids,
"prompt_lengths": context_lengths,
"combined_lengths": combined_lengths,
"dataset_mask": batch[0]['metadata']['mask'] if 'metadata' in batch[0] else None,
"dataset_mask": batch[0]['metadata']['mask'] if 'metadata' in batch[0] else "",
}

return output
Expand Down Expand Up @@ -588,6 +588,8 @@ def get_rewards_meta(self, list_of_batches):
reward_scores = [[] for _ in range(sum([len(b["prompt_lengths"]) for b in list_of_batches]))]
reward_scores = []
reward_responses, prompt_lengths, resp_lengths, is_end = self.get_generations(list_of_batches)
if torch.distributed.get_rank() == 0 and torch.distributed.get_rank() == parallel_state.get_data_parallel_src_rank():
print(f"*** META_PROMPT_AND_RESP [ {self.tokenizer.ids_to_text(reward_responses[0].tolist())} ]")
batch_responses_str = []
for t, s, e in zip(reward_responses, prompt_lengths.tolist(), resp_lengths.tolist()):
response = self.tokenizer.ids_to_text(t[s:e].tolist())
Expand Down Expand Up @@ -1056,6 +1058,7 @@ def augment_dataloader(self, dataloader):
orig_response_str = self.tokenizer.ids_to_text(
cand_for_meta[1][cand_for_meta[2] : cand_for_meta[3]].tolist()
)
norm_prompt_str, norm_response_str = self.normalise_prompt(orig_prompt_str, orig_response_str, buffer[0]["dataset_mask"])
meta_batch = []
for a, b in itertools.combinations(
[self.tokenizer.ids_to_text(s[0][s[1] : s[2]].tolist()) for s in reward_tokens_raw], 2
Expand All @@ -1069,10 +1072,10 @@ def augment_dataloader(self, dataloader):
a = re.sub("(?i)(?:Score|Points): ([0-9\.]+)", "", a)
b = re.sub("(?i)(?:Score|Points): ([0-9\.]+)", "", b)
meta_str_ab = self.meta_judge_template_fn(
prompt=orig_prompt_str, response=orig_response_str, judgement_a=a, judgement_b=b
prompt=norm_prompt_str, response=norm_response_str, judgement_a=a, judgement_b=b
)
meta_str_ba = self.meta_judge_template_fn(
prompt=orig_prompt_str, response=orig_response_str, judgement_a=b, judgement_b=a
prompt=norm_prompt_str, response=norm_response_str, judgement_a=b, judgement_b=a
)
meta_tokens_ab = self.model.tokenizer.text_to_ids(meta_str_ab)
meta_tokens_ba = self.model.tokenizer.text_to_ids(meta_str_ba)
Expand Down

0 comments on commit b93549f

Please sign in to comment.