Skip to content

Commit

Permalink
new tasks, preprocessing+postprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
sileod committed Jan 9, 2023
1 parent 6b1c455 commit b26a9bb
Show file tree
Hide file tree
Showing 3 changed files with 432 additions and 396 deletions.
4 changes: 3 additions & 1 deletion src/tasksource/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __map_to_target(x,fn=lambda x:None, target=None):
return x

def __call__(self,dataset, max_rows=None, max_rows_eval=None):
dataset = self.pre_process(dataset)
for k,v in zip(self.default_splits, self.splits):
if v and k!=v:
dataset[k]=dataset[v]
Expand All @@ -52,7 +53,7 @@ def __call__(self,dataset, max_rows=None, max_rows_eval=None):
and type(v)==str and k!=v)})
for k in self.to_dict().keys():
v=getattr(self, k)
if callable(v) and k!="post_process":
if callable(v) and k not in {"post_process","pre_process"}:
dataset=dataset.map(self.__map_to_target,
fn_kwargs={'fn':v,'target':k})

Expand Down Expand Up @@ -164,6 +165,7 @@ class SharedFields:
splits:list=Preprocessing.default_splits
dataset_name:str = None
config_name:str = None
pre_process: callable = lambda x:x
post_process: callable = lambda x:x

@dataclass
Expand Down
29 changes: 25 additions & 4 deletions src/tasksource/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@
###################### NLI/paraphrase ###############################


anli__a1 = Classification('premise','hypothesis','label', splits=['train_r1','dev_r1','test_r1'])
anli__a2 = Classification('premise','hypothesis','label', splits=['train_r2','dev_r2','test_r2'])
anli__a3 = Classification('premise','hypothesis','label', splits=['train_r3','dev_r3','test_r3'])


babi_nli = Classification("premise", "hypothesis", "label",
dataset_name="metaeval/babi_nli",
config_name=set(get_dataset_config_names("metaeval/babi_nli"))-{"agents-motivations"}
) # agents-motivations task is not as clear-cut as the others

anli__a1 = Classification('premise','hypothesis','label', splits=['train_r1','dev_r1','test_r1'])
anli__a2 = Classification('premise','hypothesis','label', splits=['train_r2','dev_r2','test_r2'])
anli__a3 = Classification('premise','hypothesis','label', splits=['train_r3','dev_r3','test_r3'])

sick__label = Classification('sentence_A','sentence_B','label')
sick__relatedness = Classification('sentence_A','sentence_B','relatedness_score')
Expand Down Expand Up @@ -308,7 +310,20 @@ def split_choices(s):
######################## Classification (other) ########################

utilitarianism = Classification("comparison",labels="label",
dataset_name="")
dataset_name="metaeval/utilitarianism")

amazon_counterfactual = Classification(
"text", labels="label",
dataset_name="mteb/amazon_counterfactual",
config_name="en")

insincere_questions = Classification(
"text", labels="label",
dataset_name="SetFit/insincere-questions")

toxic_conversations = Classification(
"text", labels="label",
dataset_name="SetFit/toxic_conversations")

turingbench = Classification("Generation",labels="label",
dataset_name="turingbench/TuringBench",
Expand Down Expand Up @@ -378,6 +393,10 @@ def split_choices(s):
"persuasiveness-eloquence", "persuasiveness-premisetype", "persuasiveness-relevance", "persuasiveness-specificity",
"persuasiveness-strength", "sarcasm","stac"])

silicone = Classification("Uterance",labels="Label",
config_name=['dyda_da', 'dyda_e', 'iemocap', 'maptask', 'meld_e', 'meld_s', 'oasis', 'sem'] # +['swda', 'mrda'] # in pragmeval
)

#lex_glue___ecthr_a = Classification(sentence1="text", labels="labels") # too long
#lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long
lex_glue___eurlex = Classification(sentence1="text", labels="labels")
Expand Down Expand Up @@ -591,6 +610,8 @@ def split_choices(s):
sarcasm_news = Classification("headline", labels="is_sarcastic",
dataset_name="raquiba/Sarcasm_News_Headline")

sem_eval_2010_task_8 = Classification("sentence",labels="relation")

###END
################### END OF SUPPORT ######################

Expand Down
Loading

0 comments on commit b26a9bb

Please sign in to comment.