From a5f76438b917301dfe78e64bd921a7b8a6a88669 Mon Sep 17 00:00:00 2001 From: Damien Sileo Date: Wed, 3 Jan 2024 15:20:38 +0100 Subject: [PATCH] new tasks --- .../.ipynb_checkpoints/recast-checkpoint.py | 113 ++ .../.ipynb_checkpoints/tasks-checkpoint.py | 1106 +++++++++++++++++ src/tasksource/recast.py | 5 +- src/tasksource/tasks.py | 19 +- tasks.md | 893 ++++++------- 5 files changed, 1688 insertions(+), 448 deletions(-) create mode 100644 src/tasksource/.ipynb_checkpoints/recast-checkpoint.py create mode 100755 src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py diff --git a/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py b/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py new file mode 100644 index 0000000..61a6952 --- /dev/null +++ b/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py @@ -0,0 +1,113 @@ +import random +from datasets import DatasetDict, Dataset +from sorcery import dict_of +import string + +improper_labels =['recast/recast_kg_relations','linguisticprobing',"lex_glue/scotus",'lexical_relation_classification/ROOT09',"pragmeval/squinky","pragmeval/emobank",'pragmeval/persuasiveness'] +improper_labels += ['glue/stsb', 'sick/relatedness', 'joci', 'utilitarianism', 'amazon_counterfactual/en', 'toxic_conversations', 'ethos/multilabel', 'lex_glue/eurlex', 'lex_glue/unfair_tos', 'app_reviews', 'humicroedit/subtask-1', 'stackoverflow-questions', 'go_emotions/simplified', 'google_wellformed_query', 'has_part', 'blog_authorship_corpus/age', 'promptCoherence', 'Sarcasm_News_Headline', 'auditor_review/demo-org--auditor_review', 'Dynasent_Disagreement', 'Politeness_Disagreement', 'SBIC_Disagreement', 'SChem_Disagreement', 'Dilemmas_Disagreement', 'sts-companion', 'acceptability-prediction', 'chaos-mnli-ambiguity', 'headline_cause/en_simple', 'oasst1_dense_flat', 'civil_comments'] + +improper_labels += ['stsb_multi_mt','MLMA_hate_speech','icl-symbol-tuning-instruct','zero-shot-label-nli'] + +def render_options(options): + options = [f'"{x}"' for x in options] + return f"{', '.join(options[:-1])} or {options[-1]}" + +def render_classification(text,options,answer): + example = 'A→B' if text.startswith('A:') else 'the following' + inputs = f'With no explanation, label {example} with either {render_options(options)}.\n{text}' + targets = f"{answer}." + return dict_of(inputs,targets) + +def render_token_classification(tokens,options,labels): + prefix = f'With no explanation, label each line with {render_options(options)} preceded by ":".\n' + inputs = prefix+"\n".join(tokens) + targets = "\n".join([':'.join(x) for x in zip(tokens,labels)]) + return dict_of(inputs,targets) + +def render_multiple_choice(prompt, options, labels): + inputs=(prompt+'\n' if prompt else '') + letters = string.ascii_uppercase[:len(options)] + inputs=f'With no explanation, chose the best option from {render_options(letters)}. {inputs}' + for letter, option in zip(letters, options): + inputs+=f'\n{letter}: {option}' + targets = f'{letters[labels]}.' + return dict_of(inputs, targets) + +def negative_sample_options(y, labels,N=4): + if len(labels)0), + dataset_name="openai/webgpt_comparisons") + +synthetic_instruct = MultipleChoice('prompt', choices=['chosen', 'rejected'], + labels=constant(0), dataset_name="Dahoas/synthetic-instruct-gptj-pairwise") + +scruples = Classification("text",labels="binarized_label",dataset_name="metaeval/scruples") + +wouldyourather = MultipleChoice(constant('Most people would rather:'), choices=['option_a','option_b'], + labels= lambda x: int(x['votes_a']