From a5f76438b917301dfe78e64bd921a7b8a6a88669 Mon Sep 17 00:00:00 2001
From: Damien Sileo <dsileo@magnet-magnet7.lille.inria.fr>
Date: Wed, 3 Jan 2024 15:20:38 +0100
Subject: [PATCH] new tasks

---
 .../.ipynb_checkpoints/recast-checkpoint.py   |  113 ++
 .../.ipynb_checkpoints/tasks-checkpoint.py    | 1106 +++++++++++++++++
 src/tasksource/recast.py                      |    5 +-
 src/tasksource/tasks.py                       |   19 +-
 tasks.md                                      |  893 ++++++-------
 5 files changed, 1688 insertions(+), 448 deletions(-)
 create mode 100644 src/tasksource/.ipynb_checkpoints/recast-checkpoint.py
 create mode 100755 src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py

diff --git a/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py b/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py
new file mode 100644
index 0000000..61a6952
--- /dev/null
+++ b/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py
@@ -0,0 +1,113 @@
+import random
+from datasets import DatasetDict, Dataset
+from sorcery import dict_of
+import string
+
+improper_labels =['recast/recast_kg_relations','linguisticprobing',"lex_glue/scotus",'lexical_relation_classification/ROOT09',"pragmeval/squinky","pragmeval/emobank",'pragmeval/persuasiveness']
+improper_labels += ['glue/stsb', 'sick/relatedness', 'joci', 'utilitarianism', 'amazon_counterfactual/en', 'toxic_conversations', 'ethos/multilabel', 'lex_glue/eurlex', 'lex_glue/unfair_tos', 'app_reviews', 'humicroedit/subtask-1', 'stackoverflow-questions', 'go_emotions/simplified', 'google_wellformed_query', 'has_part', 'blog_authorship_corpus/age', 'promptCoherence', 'Sarcasm_News_Headline', 'auditor_review/demo-org--auditor_review', 'Dynasent_Disagreement', 'Politeness_Disagreement', 'SBIC_Disagreement', 'SChem_Disagreement', 'Dilemmas_Disagreement', 'sts-companion', 'acceptability-prediction', 'chaos-mnli-ambiguity', 'headline_cause/en_simple', 'oasst1_dense_flat', 'civil_comments']
+
+improper_labels += ['stsb_multi_mt','MLMA_hate_speech','icl-symbol-tuning-instruct','zero-shot-label-nli']
+
+def render_options(options):
+    options = [f'"{x}"' for x in options]
+    return f"{', '.join(options[:-1])} or {options[-1]}"
+
+def render_classification(text,options,answer):
+    example = 'A→B' if text.startswith('A:') else 'the following'
+    inputs = f'With no explanation, label {example} with either {render_options(options)}.\n{text}'
+    targets = f"{answer}."
+    return dict_of(inputs,targets)
+
+def render_token_classification(tokens,options,labels):
+    prefix = f'With no explanation, label each line with {render_options(options)} preceded by ":".\n'
+    inputs = prefix+"\n".join(tokens)
+    targets = "\n".join([':'.join(x) for x in zip(tokens,labels)])
+    return dict_of(inputs,targets)
+
+def render_multiple_choice(prompt, options, labels):
+    inputs=(prompt+'\n' if prompt else '')
+    letters = string.ascii_uppercase[:len(options)]
+    inputs=f'With no explanation, chose the best option from {render_options(letters)}. {inputs}'    
+    for letter, option in zip(letters, options):
+        inputs+=f'\n{letter}: {option}'
+    targets = f'{letters[labels]}.'
+    return dict_of(inputs, targets) 
+
+def negative_sample_options(y, labels,N=4):
+    if len(labels)<N:
+        return labels
+    else:
+        return [y]+random.sample([x for x in labels if x!=y], N-1)
+
+def shuffle_choices(x):
+    choices = sorted([k for k in x if 'choice' in k])
+    choices_texts = [x[c] for c in choices]
+    correct_choice =choices_texts[x['labels']]
+    random.shuffle(choices_texts)
+    for c, ct in zip(choices, choices_texts):
+        x[c]=ct
+    x["labels"]=choices_texts.index(correct_choice)
+    return x
+
+def recast_dataset_classification_to_mc(dataset,sep="[SEP]",N=4):
+
+    def recast_split(d,N=N):
+        labels = d.features['labels']
+        df=d.to_pandas()
+        df['inputs'] = df.sentence1
+        if "sentence2" in df:
+            df['inputs'] +=sep + df.sentence2
+
+        N=min(N, len(labels.names))
+        df['choices']=df.apply(lambda x:negative_sample_options(labels.int2str(x['labels']), labels.names,N),axis=1)     
+        df['labels']=df.apply(lambda x:x['choices'].index(labels.int2str(x['labels'])),axis=1)
+
+        for i in range(N):
+            df[f'choice{i}']= "This example is " + df.choices.map(lambda x:x[i])
+
+        choices = [f'choice{i}' for i in range(N)]
+        return Dataset.from_pandas(df[['inputs',*choices,'labels']],preserve_index=False)
+
+    return DatasetDict({k: recast_split(v) for k,v in dataset.items()})
+
+
+def recast_instruct(dataset):
+    features = dataset['train'].features
+    labels = features['labels']
+
+    if "sentence1" in features:
+        task_type='Classification'
+    if "choice0" in features:
+        task_type = "MultipleChoice"
+    if "tokens" in features:
+        task_type = "TokenClassification"
+
+    def recast_MultipleChoice(x):
+        x=shuffle_choices(x)
+        choices = sorted([k for k in x if 'choice' in k])
+        if all([x[c] in x['inputs'] for c in choices]):
+            return {"inputs":x['inputs'], 'targets': x[f"choice{x['labels']}"].strip()+"."}
+        else:
+            return render_multiple_choice(x['inputs'],[x[c] for c in choices],x['labels'])
+
+    def recast_TokenClassification(x):
+        distractors = list(labels.feature.names)
+        x_labels = [labels.feature.int2str(y) for y in x['labels']]
+        labels_set= list({labels.feature.int2str(y) for y in x['labels']})
+        options=list(dict.fromkeys(labels_set+distractors))[:max(len(labels_set),10)]
+        return render_token_classification(x['tokens'],options,x_labels)
+
+    def recast_Classification(x):
+        if 'sentence2' in x:
+            text=f"A: {x['sentence1']}\nB: {x['sentence2']}"
+        else:
+            text=x['sentence1']
+            
+        answer=labels.int2str(x['labels']).strip()
+        options= negative_sample_options(answer, labels._int2str)
+        return render_classification(text, options, answer)
+        
+    dataset = dataset.map(eval(f"recast_{task_type}"))
+    dataset = dataset.remove_columns([k for k in features if k not in ['inputs','targets']])
+    return dataset
+ 
\ No newline at end of file
diff --git a/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py b/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py
new file mode 100755
index 0000000..8513a55
--- /dev/null
+++ b/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py
@@ -0,0 +1,1106 @@
+from .preprocess import cat, get, regen, name, constant, Classification, TokenClassification, MultipleChoice
+from .metadata import bigbench_discriminative_english, blimp_hard, imppres_presupposition, imppres_implicature, udep_en_configs, udep_en_labels
+from datasets import get_dataset_config_names, Sequence, ClassLabel, Dataset, DatasetDict
+
+# variable name: dataset___config__task
+
+###################### NLI/paraphrase ###############################
+
+glue___mnli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["train", None, "validation_matched"])
+glue___qnli = Classification("question","sentence", labels="label")
+glue___rte = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
+glue___wnli = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
+#glue___ax = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["test", None, None]) # fully masked
+
+glue___mrpc = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
+glue___qqp = Classification(sentence1="question1", sentence2="question2", labels="label")
+glue___stsb = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
+
+super_glue___boolq = Classification(sentence1="question", labels="label")
+super_glue___cb = Classification(sentence1="premise", sentence2="hypothesis", labels="label")
+super_glue___multirc = Classification(
+    cat(["paragraph", "question"]),
+    'answer',
+    labels='label'
+)
+#super_glue___rte = Classification(sentence1="premise", sentence2="hypothesis", labels="label") # in glue
+super_glue___wic = Classification(
+    sentence1=cat(["word","sentence1"], " : "),
+    sentence2=cat(["word","sentence2"], " : "),
+    labels='label'
+)
+super_glue___axg = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["test", None, None])
+
+
+anli__a1 = Classification('premise','hypothesis','label', splits=['train_r1','dev_r1','test_r1'])
+anli__a2 = Classification('premise','hypothesis','label', splits=['train_r2','dev_r2','test_r2'])
+anli__a3 = Classification('premise','hypothesis','label', splits=['train_r3','dev_r3','test_r3'])
+
+
+babi_nli = Classification("premise", "hypothesis", "label",
+    dataset_name="metaeval/babi_nli",
+    config_name=set(get_dataset_config_names("metaeval/babi_nli"))-{"agents-motivations"}
+) # agents-motivations task is not as clear-cut as the others
+
+
+sick__label         = Classification('sentence_A','sentence_B','label')
+sick__relatedness   = Classification('sentence_A','sentence_B','relatedness_score')
+sick__entailment_AB = Classification('sentence_A','sentence_B','entailment_AB')
+#sick__entailment_BA = Classification('sentence_A','sentence_B','entailment_BA')
+
+def remove_neg_1(dataset):
+    return dataset.filter(lambda x:x['labels']!=-1)
+
+snli = Classification(sentence1="premise", sentence2="hypothesis", labels="label",
+    post_process=remove_neg_1)
+
+scitail = Classification("sentence1","sentence2","gold_label",config_name="snli_format")
+
+hans = Classification(sentence1="premise", sentence2="hypothesis", labels="label")
+
+wanli = Classification('premise','hypothesis','gold', dataset_name="alisawuffles/WANLI")
+
+recast_nli = Classification(sentence1="context", sentence2="hypothesis", labels="label", dataset_name="metaeval/recast",
+    config_name=['recast_kg_relations', 'recast_puns', 'recast_factuality', 'recast_verbnet',
+    'recast_verbcorner', 'recast_ner', 'recast_sentiment', 'recast_megaveridicality'])
+
+
+probability_words_nli = Classification(sentence1="context", sentence2="hypothesis", labels="label",
+    dataset_name="sileod/probability_words_nli", 
+    config_name=["reasoning_1hop","reasoning_2hop","usnli"])
+
+nan_nli = Classification("premise", "hypothesis", "label", dataset_name="joey234/nan-nli", config_name="joey234--nan-nli")
+
+nli_fever = Classification("premise","hypothesis","label",
+    dataset_name="pietrolesci/nli_fever", splits=["train","dev",None])
+
+breaking_nli = Classification("sentence1","sentence2","label",
+    dataset_name="pietrolesci/breaking_nli", splits=["full",None,None])
+
+conj_nli = Classification("premise","hypothesis","label",post_process=remove_neg_1,
+    dataset_name="pietrolesci/conj_nli",splits=['train','dev',None])
+
+fracas = Classification("premise","hypothesis","label",
+    dataset_name="pietrolesci/fracas")
+
+dialogue_nli = Classification("sentence1","sentence2","label",
+    dataset_name="pietrolesci/dialogue_nli")   
+
+mpe_nli = Classification("premise","hypothesis","label",
+    dataset_name="pietrolesci/mpe",
+    splits=["train","dev","test"])  
+
+dnc_nli = Classification("context","hypothesis","label",
+    dataset_name="pietrolesci/dnc")
+
+# gpt3_nli = Classification("text_a","text_b","label",dataset_name="pietrolesci/gpt3_nli") # not sound enough
+
+recast_white__fnplus = Classification("text","hypothesis","label",
+    dataset_name="pietrolesci/recast_white",splits=['fnplus',None,None])
+recast_white__sprl = Classification("text","hypothesis","label",
+    dataset_name="pietrolesci/recast_white",splits=['sprl',None,None])
+recast_white__dpr = Classification("text","hypothesis","label",
+    dataset_name="pietrolesci/recast_white",splits=['dpr',None,None])
+
+joci = Classification("context","hypothesis",
+    labels=lambda x: [None, "impossible", "technically possible", "plausible", "likely", "very likely"][x["original_label"]],
+    pre_process=lambda ds:ds.filter(lambda x:x['original_label']!=0),
+    dataset_name="pietrolesci/joci",splits=['full',None,None])
+
+#enfever_nli = Classification("evidence","claim","label", dataset_name="ctu-aic/enfever_nli")
+
+#contrast_nli = Classification("premise", "hypothesis",	"label",dataset_name="martn-nguyen/contrast_nli") # generated
+
+robust_nli__IS_CS = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/robust_nli", splits=["IS_CS",None,None])
+robust_nli__LI_LI = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/robust_nli", splits=["LI_LI",None,None])
+robust_nli__ST_WO = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/robust_nli", splits=["ST_WO",None,None])
+robust_nli__PI_SP = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/robust_nli", splits=["PI_SP",None,None])
+robust_nli__PI_CD = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/robust_nli", splits=["PI_CD",None,None])
+robust_nli__ST_SE = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/robust_nli", splits=["ST_SE",None,None])
+robust_nli__ST_NE = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/robust_nli", splits=["ST_NE",None,None])
+robust_nli__ST_LM = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/robust_nli", splits=["ST_LM",None,None])
+robust_nli_is_sd = Classification("premise","hypothesis","label",
+    dataset_name="pietrolesci/robust_nli_is_sd")
+robust_nli_li_ts = Classification("premise","hypothesis","label",
+    dataset_name="pietrolesci/robust_nli_li_ts")
+
+gen_debiased_nli__snli_seq_z = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_seq_z",None,None])
+gen_debiased_nli__snli_z_aug = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_z_aug",None,None])
+gen_debiased_nli__snli_par_z = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_par_z",None,None])
+gen_debiased_nli__mnli_par_z = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_par_z",None,None])
+gen_debiased_nli__mnli_z_aug = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_z_aug",None,None])
+gen_debiased_nli__mnli_seq_z = Classification("premise","hypothesis","label",
+	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_seq_z",None,None])
+
+add_one_rte = Classification("premise","hypothesis","label",
+    dataset_name="pietrolesci/add_one_rte",splits=["train","dev","test"])
+
+def _imppres_post_process(ds,prefix=''):
+    # imppres entailment definition is either purely semantic or purely pragmatic
+    # because of that, we assign differentiate the labels from anli/mnli notation
+    return ds.cast_column('labels', ClassLabel(
+    names=[f'{prefix}_entailment',f'{prefix}_neutral',f'{prefix}_contradiction']))
+
+imppres__presupposition = imppres__prag = Classification("premise","hypothesis","gold_label",
+    dataset_name="metaeval/imppres", config_name=imppres_presupposition,
+    post_process=_imppres_post_process)
+
+imppres__prag = Classification("premise","hypothesis","gold_label_prag",
+    dataset_name="metaeval/imppres", config_name=imppres_implicature,
+    post_process=lambda x: _imppres_post_process(x,'pragmatic'))
+
+imppres__log = Classification("premise","hypothesis","gold_label_log",
+    dataset_name="metaeval/imppres", config_name=imppres_implicature,
+    post_process=lambda x: _imppres_post_process(x,'logical'))
+
+
+glue__diagnostics = Classification("premise","hypothesis","label",
+    dataset_name="pietrolesci/glue_diagnostics",splits=["test",None,None])
+
+hlgd = Classification("headline_a", "headline_b", labels="label")
+
+paws___labeled_final   = Classification("sentence1", "sentence2", name('label',['not_paraphrase','paraphrase']))
+paws___labeled_swap    = Classification("sentence1", "sentence2", name('label',['not_paraphrase','paraphrase']), splits=["train", None, None])
+#paws___unlabeled_final = Classification("sentence1", "sentence2", "label")
+
+#quora = Classification(get.questions.text[0], get.questions.text[1], 'is_duplicate') # in glue
+medical_questions_pairs = Classification("question_1","question_2", name("label",['False','True']))
+ 
+###################### Token Classification #########################
+
+conll2003__pos_tags   = TokenClassification(tokens="tokens", labels='pos_tags')
+conll2003__chunk_tags = TokenClassification(tokens="tokens", labels='chunk_tags')
+conll2003__ner_tags   = TokenClassification(tokens="tokens", labels='ner_tags')
+
+#tner___tweebank_ner    = TokenClassification(tokens="tokens", labels="tags")
+
+######################## Multiple choice ###########################
+
+anthropic_rlhf = MultipleChoice(constant(''), ['chosen','rejected'], constant(0),
+    dataset_name="Anthropic/hh-rlhf")
+
+model_written_evals = MultipleChoice('question', choices=['answer_matching_behavior','answer_not_matching_behavior'], labels=constant(0),  
+    dataset_name="Anthropic/model-written-evals")
+
+truthful_qa___multiple_choice = MultipleChoice(
+    "question",
+    choices_list=get.mc1_targets.choices,
+    labels=constant(0)
+)
+
+fig_qa = MultipleChoice(
+    "startphrase",
+    choices=["ending1","ending2"],
+    labels="labels",
+    dataset_name="nightingal3/fig-qa",
+    splits=["train","validation",None]
+)
+
+bigbench = MultipleChoice(
+    'inputs',
+    choices_list='multiple_choice_targets',
+    labels=lambda x:x['multiple_choice_scores'].index(1) if 1 in ['multiple_choice_scores'] else -1,
+    dataset_name='tasksource/bigbench',
+    config_name=bigbench_discriminative_english - {"social_i_qa","intersect_geometry"} # english multiple choice tasks, minus duplicates
+)
+
+blimp_hard = MultipleChoice(inputs=constant(''),
+    choices=['sentence_good','sentence_bad'],
+    labels=constant(0),
+    dataset_name="blimp",
+    config_name=blimp_hard # tasks where GPT2 is at least 10% below  human accuracy
+)
+
+cos_e = MultipleChoice('question',
+    choices_list='choices',
+    labels= lambda x: x['choices_list'].index(x['answer']),
+    config_name='v1.0')
+
+cosmos_qa = MultipleChoice(cat(['context','question']),regen('answer[0-3]'),'label')
+
+dream = MultipleChoice(
+    lambda x:"\n".join(x['dialogue']+[x['question']]),
+    choices_list='choice',
+    labels=lambda x:x['choices_list'].index(x['answer'])
+)
+
+openbookqa = MultipleChoice(
+    'question_stem',
+    choices_list=get.choices.text,
+    labels='answerKey'
+)
+
+qasc = MultipleChoice(
+    'question',
+    choices_list=get.choices.text,
+    labels=lambda x: "ABCDEFGH".index(x['answerKey']),
+    splits=['train','validation',None]
+    
+)
+
+quartz = MultipleChoice(
+    'question',
+    choices_list=get.choices.text,
+    labels='answerKey'
+)
+quail = MultipleChoice(
+    cat(['context','question']),
+    choices_list='answers',
+    labels='correct_answer_id' 
+)
+
+head_qa___en = MultipleChoice("qtext",
+    choices_list = lambda x:[a['atext'] for a in x["answers"]],
+    labels = lambda x:[a['aid'] for a in x["answers"]].index(x["ra"])
+)
+
+
+sciq = MultipleChoice(
+    'question',
+    ['correct_answer']+regen('distractor[1-3]'),
+    labels=constant(0))
+
+social_i_qa = MultipleChoice(
+    'question',
+    ['answerA','answerB','answerC'],
+    'label')
+
+wiki_hop___original = MultipleChoice(
+    'question', 
+    choices_list='candidates',
+    labels=lambda x:x['choices_list'].index(x["answer"]))
+
+wiqa = MultipleChoice('question_stem',
+    choices_list = lambda x: x['choices']['text'],
+    labels='answer_label_as_choice')
+
+piqa = MultipleChoice('goal', choices=['sol1','sol2'], labels='label')
+
+hellaswag = MultipleChoice('ctx_a',
+    choices_list=lambda x: [f'{x["ctx_b"]}{e}' for e in x["endings"]],
+    labels='label', splits=['train','validation',None])
+
+super_glue___copa = MultipleChoice('premise',['choice1','choice2'],'label')
+
+balanced_copa = MultipleChoice('premise',['choice1','choice2'],'label',
+    dataset_name="pkavumba/balanced-copa")
+
+e_care = MultipleChoice('premise',['choice1','choice2'],'label',
+    dataset_name="12ml/e-CARE")
+
+art = MultipleChoice(cat(['hypothesis_1','hypothesis_2']),
+    ['observation_1','observation_2'],
+    labels=lambda x:x['label']-1,
+    splits=['train','validation',None]
+)
+
+
+mmlu = MultipleChoice('question',labels='answer',choices_list='choices',splits=['validation','dev','test'],
+    dataset_name="tasksource/mmlu",
+    config_name=get_dataset_config_names("tasksource/mmlu")
+)
+
+winogrande = MultipleChoice('sentence',['option1','option2'],'answer',config_name='winogrande_xl',
+    splits=['train','validation',None])
+
+codah = MultipleChoice('question_propmt',choices_list='candidate_answers',labels='correct_answer_idx',config_name='codah')
+
+ai2_arc__challenge = MultipleChoice('question',
+    choices_list=get.choices.text,  
+    labels=lambda x: get.choices.label(x).index(x["answerKey"]),
+    config_name=["ARC-Challenge","ARC-Easy"])
+
+definite_pronoun_resolution = MultipleChoice(
+    inputs=cat(["sentence","pronoun"],' : '),
+    choices_list='candidates',
+    labels="label",
+    splits=['train',None,'test'])
+
+swag___regular=MultipleChoice(cat(["sent1","sent2"]),regen("ending[0-3]"),"label")
+
+def _split_choices(s):
+    import re
+    return [x.rstrip(', ') for x in re.split(r'[a-e] \) (.*?)',s) if x.strip(', ')]
+
+math_qa = MultipleChoice(
+    'Problem', 
+    choices_list = lambda x: _split_choices(x['options']),
+    labels = lambda x:'abcde'.index(x['correct'])   
+)
+
+#aqua_rat___tokenized = MultipleChoice("question",choices_list="options",labels=lambda x:"ABCDE".index(x['correct'])) in math_qa
+
+
+######################## Classification (other) ########################
+glue___cola = Classification(sentence1="sentence", labels="label")
+glue___sst2 = Classification(sentence1="sentence", labels="label")
+
+utilitarianism = Classification("comparison",labels="label",
+dataset_name="metaeval/utilitarianism")
+
+amazon_counterfactual = Classification(
+    "text", labels="label",
+    dataset_name="mteb/amazon_counterfactual",
+    config_name="en")
+
+insincere_questions = Classification(
+    "text", labels="label_text",
+    dataset_name="SetFit/insincere-questions")
+
+toxic_conversations = Classification(
+    "text", labels="label",
+    dataset_name="SetFit/toxic_conversations")
+
+turingbench = Classification("Generation",labels="label",
+    dataset_name="turingbench/TuringBench",
+    splits=["train","validation",None])
+
+
+trec = Classification(sentence1="text", labels="fine_label")
+
+tals_vitaminc = Classification('claim','evidence','label', dataset_name="tals/vitaminc", config_name="tals--vitaminc")
+
+hope_edi = Classification("text", labels="label", splits=["train", "validation", None], config_name=["english"])
+
+#fever___v1_0 = Classification(sentence1="claim", labels="label", splits=["train", "paper_dev", "paper_test"], dataset_name="fever", config_name="v1.0")
+#fever___v2_0 = Classification(sentence1="claim", labels="label", splits=[None, "validation", None], dataset_name="fever", config_name="v2.0")
+
+rumoureval_2019 = Classification(
+    sentence1="source_text",
+    sentence2=lambda x: str(x["reply_text"]),
+    labels="label", dataset_name="strombergnlp/rumoureval_2019", config_name="RumourEval2019",
+    post_process=lambda ds:ds.filter(lambda x:x['labels']!=None)    
+)
+
+ethos___binary = Classification(sentence1="text", labels="label", splits=["train", None, None])
+ethos___multilabel = Classification(
+    'text',
+    labels=lambda x: [x[c] for c in
+    ['violence', 'gender', 'race', 'national_origin', 'disability', 'religion', 'sexual_orientation','directed_vs_generalized']
+    ],
+    splits=["train", None, None]
+)
+
+tweet_eval = Classification(sentence1="text", labels="label",
+    config_name=["emoji", "emotion", "hate", "irony", "offensive", "sentiment"])
+
+def stance_kwargs(topic):
+    return {
+        "sentence1": constant(f'Topic: {topic}. \n Opinion:\n'), 
+        "sentence2": "text", 
+        "labels": "label", 
+        "config_name": f"stance_{topic.lower()}",
+        "dataset_name": "tweet_eval"
+    }
+
+tweet_eval_abortion = Classification(**stance_kwargs("abortion"))
+tweet_eval_atheism  = Classification(**stance_kwargs("atheism"))
+tweet_eval_climate  = Classification(**stance_kwargs("climate"))
+tweet_eval_feminist = Classification(**stance_kwargs("feminist"))
+tweet_eval_hillary  = Classification(**stance_kwargs("Hillary"))
+
+    
+
+discovery = Classification("sentence1", "sentence2", labels="label", config_name=["discovery"])
+
+pragmeval_1 = Classification("sentence",labels="label",
+    dataset_name="pragmeval",
+    config_name= ["emobank-arousal", "emobank-dominance", "emobank-valence", "squinky-formality", "squinky-implicature", 
+    "squinky-informativeness","switchboard","mrda","verifiability"])
+
+pragmeval_2 = Classification("sentence1","sentence2",labels="label",
+    dataset_name="pragmeval",
+    config_name= ["emergent", "gum", "pdtb", "persuasiveness-claimtype", 
+    "persuasiveness-eloquence", "persuasiveness-premisetype", "persuasiveness-relevance", "persuasiveness-specificity", 
+    "persuasiveness-strength", "sarcasm","stac"])
+
+silicone = Classification("Utterance",labels="Label",
+    config_name=['dyda_da', 'dyda_e', 'iemocap', 'maptask', 'meld_e', 'meld_s', 'oasis', 'sem'] # +['swda', 'mrda'] # in pragmeval
+)
+
+#lex_glue___ecthr_a = Classification(sentence1="text", labels="labels") # too long
+#lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long
+lex_glue___eurlex = Classification(sentence1="text", labels="labels") 
+lex_glue___scotus = Classification(sentence1="text", labels="label")
+lex_glue___ledgar = Classification(sentence1="text", labels="label")
+lex_glue___unfair_tos = Classification(sentence1="text", labels="labels")
+lex_glue___case_hold = MultipleChoice("context", choices_list='endings', labels="label")
+
+language_identification = Classification("text",labels="labels", dataset_name="papluca/language-identification")
+
+################ Automatically generated (verified)##########
+
+imdb = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
+
+#
+
+rotten_tomatoes = Classification(sentence1="text", labels="label")
+
+ag_news = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
+
+yelp_review_full = Classification(sentence1="text", labels="label", splits=["train", None, "test"], config_name=["yelp_review_full"])
+
+financial_phrasebank = Classification(sentence1="sentence", labels="label", splits=["train", None, None],
+    config_name=["sentences_allagree"])
+
+poem_sentiment = Classification(sentence1="verse_text", labels="label")
+
+
+#emotion = Classification(sentence1="text", labels="label") # file not found
+
+dbpedia_14 = Classification(sentence1="content", labels="label", splits=["train", None, "test"], config_name=["dbpedia_14"])
+
+amazon_polarity = Classification(sentence1="content", labels="label", splits=["train", None, "test"], config_name=["amazon_polarity"])
+
+app_reviews = Classification("review", labels="star", splits=["train", None, None])
+
+# multi_nli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["train", "validation_matched", None]) #glue
+
+hate_speech18 = Classification(sentence1="text", labels="label", splits=["train", None, None])
+
+sms_spam = Classification(sentence1="sms", labels="label", splits=["train", None, None])
+
+humicroedit___subtask_1 = Classification("original", "edit", labels="meanGrade", dataset_name="humicroedit", config_name="subtask-1")
+humicroedit___subtask_2 = Classification(
+    sentence1=cat(['original1','edit1'],' : '),
+    sentence2=cat(['original2','edit2'],' : '),
+    labels="label", dataset_name="humicroedit", config_name="subtask-2")
+
+snips_built_in_intents = Classification(sentence1="text", labels="label", splits=["train", None, None])
+
+banking77 = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
+
+hate_speech_offensive = Classification(sentence1="tweet", labels="class", splits=["train", None, None])
+
+yahoo_answers_topics = Classification(
+    "question_title","question_content",labels="topic")
+
+stackoverflow_questions=Classification("title","body",labels="label",
+    dataset_name="pacovaldez/stackoverflow-questions")
+
+#hyperpartisan_news_detection___byarticle = Classification(sentence1="text", labels="hyperpartisan", splits=["train", None, None]) # files too heavy
+#hyperpartisan_news_detection___bypublisher = Classification(sentence1="text", labels="hyperpartisan", splits=["train","validation", None]) # files too heavy
+hyperpartisan_news = Classification("text",labels="label",dataset_name="zapsdcn/hyperpartisan_news")
+
+scierc = Classification("text",labels="label",dataset_name="zapsdcn/sciie")
+citation_intent = Classification("text",labels="label",dataset_name="zapsdcn/citation_intent")
+
+#go_emotions___raw = Classification(sentence1="text", splits=["train", None, None])
+go_emotions___simplified = Classification(sentence1="text", labels="labels")
+
+#boolq = Classification(sentence1="question", splits=["train", "validation", None]) # in superglue
+
+#ecthr_cases___alleged_violation_prediction = Classification(labels="labels", dataset_name="ecthr_cases", config_name="alleged-violation-prediction")
+#ecthr_cases___violation_prediction = Classification(labels="labels", dataset_name="ecthr_cases", config_name="violation-prediction")
+#   too long
+
+scicite = Classification(sentence1="string", labels="label",dataset_name="allenai/scicite")
+
+liar = Classification(sentence1="statement", labels="label")
+
+relbert_lexical_relation_classification = Classification(sentence1="head", sentence2="tail", labels="relation",
+ dataset_name="relbert/lexical_relation_classification",
+ config_name=["BLESS","CogALexV","EVALution","K&H+N","ROOT09"])
+
+
+metaeval_linguisticprobing = Classification("sentence", labels="label", dataset_name="metaeval/linguisticprobing", 
+    config_name=['subj_number',
+                'obj_number',
+                'past_present',
+                'sentence_length',
+                'top_constituents',
+                'tree_depth',
+                'coordination_inversion',
+                'odd_man_out',
+                'bigram_shift']#+['word_content'] #too many labels 
+)
+
+metaeval_crowdflower = Classification("text", labels="label",
+ splits=["train", None, None], dataset_name="metaeval/crowdflower",
+ config_name=['sentiment_nuclear_power',
+            'tweet_global_warming',
+            'airline-sentiment',
+            'corporate-messaging',
+            'economic-news',
+            'political-media-audience',
+            'political-media-bias',
+            'political-media-message',
+            'text_emotion']
+)
+
+metaeval_ethics___commonsense = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="commonsense")
+metaeval_ethics___deontology = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="deontology")
+metaeval_ethics___justice = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="justice")
+metaeval_ethics___virtue = Classification(sentence1="sentence1", sentence2="sentence2", labels="label", dataset_name="metaeval/ethics", config_name="virtue")
+
+emo = Classification(sentence1="text", labels="label", splits=["train", None, "test"], config_name=["emo2019"])
+
+google_wellformed_query = Classification(sentence1="content", labels="rating")
+
+tweets_hate_speech_detection = Classification(sentence1="tweet", labels="label", splits=["train", None, None])
+
+#adv_glue___adv_sst2 = Classification(sentence1="sentence", labels="label", splits=["validation", None, None])
+#adv_glue___adv_qqp = Classification(sentence1="question1", sentence2="question2", labels="label", splits=["validation", None, None])
+#adv_glue___adv_mnli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["validation", None, None])
+#adv_glue___adv_mnli_mismatched = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["validation", None, None])
+#adv_glue___adv_qnli = Classification(sentence1="question", labels="label", splits=["validation", None, None])
+#adv_glue___adv_rte = Classification(sentence1="sentence1", sentence2="sentence2", labels="label", splits=["validation", None, None])
+
+has_part = Classification("arg1","arg2", labels="score", splits=["train", None, None])
+
+wnut_17 = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["wnut_17"])
+
+ncbi_disease = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["ncbi_disease"])
+
+acronym_identification = TokenClassification(labels="labels", tokens="tokens")
+
+jnlpba = TokenClassification(tokens="tokens", labels="ner_tags", splits=["train", "validation", None], config_name=["jnlpba"])
+
+#species_800 = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["species_800"]) missing files
+
+SpeedOfMagic_ontonotes_english = TokenClassification(tokens="tokens", labels="ner_tags", dataset_name="SpeedOfMagic/ontonotes_english", config_name="SpeedOfMagic--ontonotes_english")
+
+blog_authorship_corpus__gender    = Classification(sentence1="text",labels="gender")
+blog_authorship_corpus__age       = Classification(sentence1="text",labels="age")
+#blog_authorship_corpus__horoscope = Classification(sentence1="text",labels="horoscope")
+blog_authorship_corpus__job       = Classification(sentence1="text",labels="job")
+
+launch_open_question_type = Classification(sentence1="question", labels="resolve_type", dataset_name="launch/open_question_type")
+
+health_fact = Classification(sentence1="claim", labels="label",
+    pre_process = lambda ds:ds.filter(lambda x:x['label'] not in {-1})
+)
+
+commonsense_qa = MultipleChoice(
+    "question",
+    choices_list=get.choices.text,
+    labels=lambda x: "ABCDE".index(x["answerKey"]),
+    splits=["train","validation",None]
+)
+mc_taco = Classification(
+    lambda x: f'{x["sentence"]} {x["question"]} {x["answer"]}',
+    labels="label",
+    splits=[ "validation",None,"test"]
+)
+
+ade_corpus_v2___Ade_corpus_v2_classification = Classification("text",labels="label")
+
+discosense = MultipleChoice("context",choices=regen("option\_[0-3]"),labels="label",
+    dataset_name="prajjwal1/discosense")
+    
+circa = Classification(
+    sentence1=cat(["context","question-X"]),
+    sentence2="answer-Y",
+    labels="goldstandard2", post_process=remove_neg_1)
+
+#code_x_glue_cc_defect_detection = Classification("func", labels="target")
+
+#code_x_glue_cc_clone_detection_big_clone_bench = Classification("func1", "func2", "label") # in bigbench + too heavy (100g)
+
+#code_x_glue_cc_code_refinement = MultipleChoice(
+#    constant(""), choices=["buggy","fixed"], labels=constant(0),
+#    config_name="medium")
+
+#effective_feedback_student_writing = Classification("discourse_text", 
+#labels="discourse_effectiveness",dataset_name="YaHi/EffectiveFeedbackStudentWriting")
+# discontinued /!\
+
+#promptSentiment = Classification("text",labels="label",dataset_name="Ericwang/promptSentiment")
+#promptNLI = Classification("premise","hypothesis",labels="label",dataset_name="Ericwang/promptNLI")
+#promptSpoke = Classification("text",labels="label",dataset_name="Ericwang/promptSpoke")
+#promptProficiency = Classification("text",labels="label",dataset_name="Ericwang/promptProficiency")
+#promptGrammar = Classification("text",labels="label",dataset_name="Ericwang/promptGrammar")
+#promptCoherence = Classification("text",labels="label",dataset_name="Ericwang/promptCoherence")
+
+phrase_similarity = Classification(
+    sentence1=cat(["phrase1","sentence1"], " : "),
+    sentence2=cat(["phrase2","sentence2"], " : "),
+    labels='label',
+    dataset_name="PiC/phrase_similarity"
+)
+
+exaggeration_detection = Classification(
+    sentence1="press_release_conclusion",
+    sentence2="abstract_conclusion",
+    labels="exaggeration_label", 
+    dataset_name="copenlu/scientific-exaggeration-detection"
+)
+quarel = Classification(
+    "question",
+    labels=lambda x: "AB"[x["answer_index"]]
+)
+
+mwong_fever_evidence_related = Classification(sentence1="claim", sentence2="evidence", labels="labels", splits=["train", "valid", "test"], dataset_name="mwong/fever-evidence-related", config_name="mwong--fever-related")
+
+numer_sense = Classification("sentence",labels="target",splits=["train",None,None])
+
+dynasent__r1 = Classification("sentence", labels="gold_label", 
+    dataset_name="dynabench/dynasent", config_name="dynabench.dynasent.r1.all")
+dynasent__r2 = Classification("sentence", labels="gold_label", 
+    dataset_name="dynabench/dynasent", config_name="dynabench.dynasent.r2.all")
+
+sarcasm_news = Classification("headline", labels="is_sarcastic",
+    dataset_name="raquiba/Sarcasm_News_Headline")
+
+sem_eval_2010_task_8 = Classification("sentence",labels="relation")
+
+demo_org_auditor_review = Classification(sentence1="sentence", labels="label", splits=["train", None, "test"], dataset_name="demo-org/auditor_review", config_name="demo-org--auditor_review")
+
+medmcqa = MultipleChoice("question", choices=regen('op[a-d]'),labels='cop')
+
+
+dynasent_disagreement    = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Dynasent_Disagreement")
+politeness_disagreement  = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Politeness_Disagreement")
+sbic_disagreement        = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/SBIC_Disagreement")
+schem_disagreement       = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/SChem_Disagreement")
+dilemmas_disagreement    = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Dilemmas_Disagreement")
+
+logiqa = MultipleChoice(
+    cat(["context","query"]),
+    choices_list = 'options',
+    labels = "correct_option",
+    dataset_name="lucasmccabe/logiqa"
+)
+
+#proto_qa = MultipleChoice(
+#    "question",
+#    choices_list=lambda x:x['answer-clusters']['answers'],
+#    labels=lambda x: x['answer-clusters']['count'].index(max(x['answer-clusters']['count'])),
+#    config_name='proto_qa'
+#)
+
+wiki_qa = Classification("question","answer", name("label",['False','True']))
+
+cycic_classification = Classification("question",labels=name("correct_answer",['False','True']),
+    dataset_name = "metaeval/cycic_classification")
+cycic_mc = MultipleChoice("question", choices=regen('answer\_option[0-4]'), labels="correct_answer",
+    dataset_name = "metaeval/cycic_multiplechoice")
+
+
+def _preprocess_chatgpt_detection(ex):
+    import random
+    label=random.random()<0.5
+    ex['label']=int(label)
+    ex['answer']=[str(ex['human_answers'][0]),str(ex['chatgpt_answers'][0])][label]
+    return ex
+
+#chatgpt_detection = Classification("question","answer","label",
+#    dataset_name = 'Hello-SimpleAI/HC3', config_name="all",
+#    pre_process=lambda dataset:dataset.map(_preprocess_chatgpt_detection))
+
+sts_companion = Classification("sentence1","sentence2","label",
+    dataset_name="metaeval/sts-companion")
+
+commonsense_qa_2 = Classification("question",labels="answer",
+    dataset_name="metaeval/commonsense_qa_2.0")
+
+ling_nli = Classification("premise","hypothesis","label",dataset_name="metaeval/lingnli")
+
+monotonicity_entailment = Classification("sentence1", "sentence2", "gold_label",    
+    dataset_name="metaeval/monotonicity-entailment")
+
+arct = MultipleChoice(cat(["reason","claim"]),choices=["warrant0","warrant1"],
+    labels="correctLabelW0orW1", dataset_name="metaeval/arct")
+
+scinli = Classification("sentence1", "sentence2", labels="label",
+    post_process=lambda x:x.shuffle(seed=0),
+    dataset_name="metaeval/scinli")
+
+naturallogic = Classification(" sent1 "," sent2 "," new_label ",dataset_name="metaeval/naturallogic")
+
+onestop_qa = MultipleChoice(cat(["paragraph","question"]),choices_list="answers",
+    labels=constant(0))
+
+moral_stories = MultipleChoice(cat(["situation","intention"]),
+    choices=['moral_action',"immoral_action"],labels=constant(0),
+    dataset_name="demelin/moral_stories", config_name="full")
+
+prost = MultipleChoice(cat(["context","ex_question"]), choices=['A','B','C','D'],labels="label",
+    dataset_name="corypaik/prost")
+
+dyna_hate = Classification("text",labels="label",dataset_name="aps/dynahate",splits=['train',None,None])
+
+syntactic_augmentation_nli = Classification('sentence1',"sentence2","gold_label",dataset_name="metaeval/syntactic-augmentation-nli")
+
+autotnli = Classification("premises", "hypothesis", "label", dataset_name="metaeval/autotnli")
+#equate = Classification("sentence1", "sentence2", "gold_label",dataset_name="metaeval/equate")
+
+conqada = Classification("sentence1","sentence2","label",dataset_name="lasha-nlp/CONDAQA",
+    pre_process = lambda ds:ds.filter(lambda x:x['label'] in {"DON'T KNOW","YES","NO"})
+)
+
+webgbpt_comparisons = MultipleChoice(get.question.full_text, choices=['answer_0','answer_1'],
+    labels=lambda x:int(x['score_1']>0),
+    dataset_name="openai/webgpt_comparisons")
+
+synthetic_instruct = MultipleChoice('prompt', choices=['chosen', 'rejected'],
+    labels=constant(0), dataset_name="Dahoas/synthetic-instruct-gptj-pairwise")
+
+scruples = Classification("text",labels="binarized_label",dataset_name="metaeval/scruples")
+
+wouldyourather = MultipleChoice(constant('Most people would rather:'), choices=['option_a','option_b'],
+    labels= lambda x: int(x['votes_a']<x['votes_b']),
+    dataset_name="metaeval/wouldyourather")
+
+attempto_nli = Classification("premise","hypothesis",
+    lambda x:f'race-{x["race_label"]}',
+    dataset_name="sileod/attempto-nli")
+
+defeasible_nli = Classification(cat(["Premise","Hypothesis"]),"Update",labels="UpdateType",
+    dataset_name="metaeval/defeasible-nli",config_name=['atomic', 'snli'])
+
+#defeasible_nli_social = Classification(cat(["SocialChemROT","Hypothesis"]),"Update",labels="UpdateType",
+#    dataset_name="metaeval/defeasible-nli",config_name='social')
+
+help_nli = Classification("ori_sentence","new_sentence","gold_label",
+    dataset_name="metaeval/help-nli")
+    
+nli_veridicality_transitivity = Classification("sentence1","sentence2","gold_label",
+    dataset_name="metaeval/nli-veridicality-transitivity")
+
+nl_satisfiability= Classification("sentence",labels="label",
+    dataset_name="metaeval/natural-language-satisfiability")
+
+lonli = Classification("premise","hypothesis","label",
+    dataset_name="metaeval/lonli")
+
+dadc_limit = Classification("sentence1","sentence2","label",
+    dataset_name="metaeval/dadc-limit-nli")
+
+flute = Classification("premise","hypothesis","label",
+    dataset_name="ColumbiaNLP/FLUTE")
+
+strategy_qa = Classification('question',labels='answer',
+    dataset_name="metaeval/strategy-qa",splits=['train',None,None])
+
+summarize_from_feedback = MultipleChoice(get.info.post,
+    choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']],
+    labels="choice",
+    dataset_name="openai/summarize_from_feedback", config_name="comparisons",
+    pre_process = lambda ds:ds.filter(lambda x: type(get.info.post(x))==str)
+)
+
+folio = Classification(lambda x: " ".join(x['premises']),"conclusion",
+    labels="label",
+    dataset_name="metaeval/folio")
+
+tomi_nli = Classification("premise","hypothesis","label",
+    dataset_name="metaeval/tomi-nli")
+
+avicenna = Classification("Premise 1","Premise 2","Syllogistic relation",
+    dataset_name="metaeval/avicenna")
+
+shp = MultipleChoice("history",
+    choices=['human_ref_A','human_ref_B'],
+    labels="labels",
+    dataset_name="stanfordnlp/SHP")
+
+medqa_usmle = MultipleChoice('sent1',choices=regen('ending[0-3]'),labels='label',
+    dataset_name="GBaker/MedQA-USMLE-4-options-hf")
+
+wikimedqa = MultipleChoice("text",choices=regen('option\_[0-7]'),labels='label',
+    dataset_name="sileod/wikimedqa",
+    config_name=["medwiki"])
+
+cicero = MultipleChoice(lambda x: " ".join(x['Dialogue']),
+    choices_list="Choices", labels=lambda x:x['Human Written Answer'][0],
+    dataset_name="declare-lab/cicero")
+
+creak = Classification("sentence",labels="label",
+    dataset_name='amydeng2000/CREAK')
+
+mutual = MultipleChoice("article",choices_list="options",
+    labels=lambda x: "ABCD".index(x['answers']),
+    dataset_name="metaeval/mutual",splits=["train",None,None])
+
+neqa = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
+    dataset_name="inverse-scaling/NeQA")
+quote_repetition = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
+    dataset_name="inverse-scaling/quote-repetition")
+redefine_math = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
+    dataset_name="inverse-scaling/redefine-math")
+
+puzzte = Classification("puzzle_text","question","answer",
+    dataset_name="metaeval/puzzte")
+
+implicatures = MultipleChoice(cat(['context','response'],"\n"),
+    choices=['correct_implicature','incorrect_implicature'],
+    labels=constant(0),
+    dataset_name='metaeval/implicatures')
+
+race = MultipleChoice(cat(['question','article'],'\n'), choices_list='options',
+    labels=lambda x:'ABCDE'.index(x['answer']),
+    config_name=['middle','high'])
+
+race_c = MultipleChoice(cat(['question','article'],'\n'),choices_list='option',labels='label',
+    dataset_name='metaeval/race-c')
+
+spartqa_yn=Classification("story","question","answer",
+    dataset_name="metaeval/spartqa-yn")
+
+spartqa_mc=MultipleChoice(cat(["story","question"]),choices_list="candidate_answers",labels="answer",
+    dataset_name="metaeval/spartqa-mchoice")
+
+temporal_nli = Classification("Premise","Hypothesis","Label",
+    dataset_name="metaeval/temporal-nli")
+
+riddle_sense = MultipleChoice("question", choices_list=get.choices.text, 
+    labels=lambda x : "ABCDE".index(x['answerKey']))
+
+clcd = Classification(
+    "sentence1","sentence2","label",
+    dataset_name="metaeval/clcd-english")
+
+twentyquestions = Classification("question","subject","answer",dataset_name="maximedb/twentyquestions")
+
+reclor = MultipleChoice(cat(["context","question"]),choices_list="answers",labels="label",
+    dataset_name="metaeval/reclor",splits=['train','validation',None])
+
+c_aug_imdb = Classification("Text",labels="Sentiment",
+    dataset_name='metaeval/counterfactually-augmented-imdb')
+
+c_aug_snli = Classification("sentence1","sentence2","gold_label",
+    dataset_name='metaeval/counterfactually-augmented-snli')
+
+cnli = Classification("premise","hypothesis","label",
+    dataset_name='metaeval/cnli')
+
+perturbed_boolq = Classification("question",labels="hard_label",
+    dataset_name='metaeval/boolq-natural-perturbations')
+
+#mega_acceptability = Classification("sentence",labels="average",
+#    dataset_name='metaeval/mega-acceptability-v2')
+
+graded_acceptability = Classification("text",labels="normalized_score",
+    dataset_name="metaeval/acceptability-prediction")
+
+equate = Classification("sentence1","sentence2","gold_label",
+    dataset_name='metaeval/equate')
+
+science_qa = MultipleChoice("question",choices_list="choices",labels="answer",
+    dataset_name="metaeval/ScienceQA_text_only")
+
+ekar=MultipleChoice("question",choices_list=get.choices.text,
+    labels=lambda x:"ABCD".index(x['answerKey']),
+dataset_name="Jiangjie/ekar_english")
+
+implicit_hate = Classification("post",labels="class",
+    dataset_name="metaeval/implicit-hate-stg1")
+
+nli_unambiguity = Classification("premise","hypothesis","gini",
+    dataset_name="metaeval/chaos-mnli-ambiguity")
+
+headline_cause = Classification('left_title','right_title','label',
+    dataset_name='IlyaGusev/headline_cause',config_name='en_simple')
+
+logiqa_2 = Classification("premise","hypothesis","label",dataset_name="metaeval/logiqa-2.0-nli")
+
+_oasst = dict(dataset_name="tasksource/oasst1_dense_flat",
+    pre_process = lambda ds:ds.filter(lambda x:x['lang']=='en'))
+
+oasst1__quality = Classification("parent_text","text",labels="quality",**_oasst)
+oasst1__toxicity = Classification("parent_text","text",labels="toxicity",**_oasst)
+oasst1__helpfulness = Classification("parent_text","text",labels="helpfulness",**_oasst)
+
+para_rules = Classification("context","question",
+    labels=name("label",["False","True"]),
+    dataset_name="qbao775/PARARULE-Plus")
+
+mindgames = Classification("premise","hypothesis","label",dataset_name="sileod/mindgames")
+
+def _udep_post_process(ds):
+    return ds.cast_column('labels', Sequence(ClassLabel(names=udep_en_labels)))
+
+udep__deprel = TokenClassification('tokens',lambda x:[udep_en_labels.index(a) for a in x['deprel']],
+    config_name=udep_en_configs,dataset_name="universal_dependencies",post_process=_udep_post_process)
+
+ambient= Classification("premise","hypothesis","hypothesis_ambiguous",dataset_name="metaeval/ambient")
+
+path_naturalness = MultipleChoice(constant(""),choices=['choice1','choice2'],labels="label",
+    dataset_name="metaeval/path-naturalness-prediction")
+
+civil_comments__toxicity = Classification("text",labels="toxicity")
+civil_comments__severe_toxicity = Classification("text",labels="severe_toxicity")
+civil_comments__obscene = Classification("text",labels="obscene")
+civil_comments__threat = Classification("text",labels="threat")
+civil_comments__insult = Classification("text",labels="insult")
+civil_comments__identity_attack = Classification("text",labels="identity_attack")
+civil_comments__sexual_explicit = Classification("text",labels="sexual_explicit")
+
+cloth = MultipleChoice("sentence", choices_list=lambda x:[x["answer"]]+x["distractors"],labels=constant(0), dataset_name="AndyChiang/cloth")
+dgen  = MultipleChoice("sentence", choices_list=lambda x:[x["answer"]]+x["distractors"],labels=constant(0), dataset_name="AndyChiang/dgen")
+
+oasst_rlhf = MultipleChoice("prompt",choices=['chosen','rejected'],labels=constant(0),
+    dataset_name="tasksource/oasst1_pairwise_rlhf_reward")
+
+i2d2 = Classification("sentence1",labels=name('label',['False','True']), dataset_name="tasksource/I2D2")
+
+arg_me = Classification('argument','conclusion','stance', dataset_name="webis/args_me")
+valueeval_stance = Classification("Premise","Conclusion","Stance", dataset_name="webis/Touche23-ValueEval")
+starcon = Classification('argument','topic','label',dataset_name="tasksource/starcon")
+
+banking77 = Classification("text",labels="label",dataset_name="PolyAI/banking77")
+
+ruletaker = Classification("context","question","label",dataset_name="tasksource/ruletaker")
+
+lsat_qa = MultipleChoice(
+    cat(['passage','question']),
+    choices_list='references',labels="gold_index",
+     dataset_name="lighteval/lsat_qa",config_name="all")
+    
+control = Classification('premise','hypothesis',"label",dataset_name="tasksource/ConTRoL-nli")
+tracie = Classification("premise","hypothesis","answer",dataset_name='tasksource/tracie')
+sherliic = Classification("premise","hypothesis","label",dataset_name='tasksource/sherliic')
+
+sen_making__1 = MultipleChoice(constant('Chose most plausible:'), choices=['sentence0','sentence1'],labels='false', 
+    dataset_name="tasksource/sen-making")
+
+sen_making__2 = MultipleChoice(lambda x: [x['sentence0'],x['sentence1']][x['false']] + '\n is not plausible because :',
+    choices=['A','B','C'],labels=lambda x: 'ABC'.index(x['reason']), dataset_name="tasksource/sen-making")
+
+winowhy = Classification('sentence', lambda x: f'In "{x["wnli_sent1"]}", {x["wnli_sent2"]}',
+    labels=name('label',['False','True']), dataset_name="tasksource/winowhy")
+
+#for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias":
+#    print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')")
+
+mbib_cognitive_bias	= Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias')
+mbib_fake_news	= Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news')
+mbib_gender_bias	= Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias')
+mbib_hate_speech	= Classification('text',labels=name('label',['not hate-speech','hate-speech']), dataset_name='mediabiasgroup/mbib-base', config_name='hate-speech')
+mbib_linguistic_bias	= Classification('text',labels=name('label',['not linguistic-bias','linguistic-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='linguistic-bias')
+mbib_political_bias	= Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias')
+mbib_racial_bias	= Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias')
+mbib_text_level_bias	= Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias')
+
+robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR")
+
+cluttr = Classification("story","query", "target_text",dataset_name="CLUTRR/v1", config_name="gen_train234_test2to10")
+
+logical_fallacy = Classification("source_article", labels="logical_fallacies", dataset_name="tasksource/logical-fallacy")
+
+parade = Classification("Definition1","Definition2", labels=name('Binary labels',["not-paraphrase","paraphrase"]), dataset_name="tasksource/parade")
+
+cladder = Classification("given_info", "question", "answer",dataset_name="tasksource/cladder")
+
+subjectivity = Classification("Sentence",labels="Label",dataset_name="tasksource/subjectivity")
+
+moh   = Classification("context","expression","label", dataset_name="tasksource/MOH")
+vuac  = Classification("context","expression","label", dataset_name="tasksource/VUAC")
+trofi = Classification("context","expression","label", dataset_name="tasksource/TroFi", splits=['train',None,'test'])
+
+sharc_classification = Classification("snippet", lambda x:f'{x["scenario"]}\n{x["question"]}',
+    labels=lambda x:x["answer"] if x['answer'] in  {"Yes","No","Irrelevant"} else "Clarification needed",
+    dataset_name='sharc_modified',config_name='mod')
+
+conceptrules_v2 = Classification("context", "text", "label", dataset_name="tasksource/conceptrules_v2")
+
+scidtb = Classification("unit1_txt","unit2_txt","label", dataset_name="metaeval/disrpt",config_name='eng.dep.scidtb.rels')
+
+chunking = TokenClassification("tokens","chunk_tags", dataset_name="conll2000")
+
+few_nerd = TokenClassification("tokens","fine_ner_tags",dataset_name="DFKI-SLT/few-nerd",config_name='supervised')
+finer = TokenClassification('tokens','ner_tags',dataset_name='nlpaueb/finer-139')
+
+label_nli = Classification("premise","hypothesis","labels",dataset_name='tasksource/zero-shot-label-nli')
+
+com2sense = Classification("sent",labels="label",dataset_name="tasksource/com2sense",splits=['train',"validation",None])
+
+scone = Classification('sentence1_edited','sentence2_edited','gold_label_edited',dataset_name="tasksource/scone")
+
+winodict = MultipleChoice(cat(['definition','sentence']),['option1','option2'],'label',dataset_name='tasksource/winodict')
+
+fool_me_twice = Classification(
+    lambda x: " ".join(a['text'] for a in x['gold_evidence']),
+    'text', 'label', dataset_name='tasksource/fool-me-twice')
+
+monli = Classification("sentence1","sentence2","gold_label", dataset_name="tasksource/monli")
+
+causality = Classification('premise','hypothesis','relation', dataset_name='tasksource/corr2cause')
+
+lsat = MultipleChoice(cat(['passage','question']), choices_list='references',labels='gold_index',dataset_name='lighteval/lsat_qa',config_name='all')
+
+apt = Classification('text_a','text_b',name('labels',['not_paraphrase','paraphrase']),dataset_name='tasksource/apt')
+
+#xsum_factuality = Classification("summary",labels="is_factual")
+
+financial_sentiment = Classification("text",labels=name('label',['Bearish','Bullish','Neutral']),
+    dataset_name="zeroshot/twitter-financial-news-sentiment")
+
+def _icl_rand(x):
+    import random
+    return random.Random(x['sentence1'][:50]).randint(0,1) #deterministic label for each input
+
+icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)],
+    labels=lambda x: int(x['symbols'][_icl_rand(x)]==x['targets']),
+    dataset_name="tasksource/icl-symbol-tuning-instruct",
+    pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char 
+    post_process=lambda ds:ds.cast_column('labels',ClassLabel(names=['False','True']))
+)
+
+space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI")
+
+propsegment = Classification("hypothesis","premise",
+    labels = lambda x:{'n':'neutral','e':'entailment','c':'contradiction'}[x['label']],
+    dataset_name="sihaochen/propsegment",config_name='nli')
+
+hatemoji = Classification('text',labels=name("label_gold", ['not-hate-speech','hate-speech']),
+    dataset_name="HannahRoseKirk/HatemojiBuild")
+
+regset = Classification("context",labels="answer",dataset_name='tasksource/regset')
+
+esci = Classification('query','product_text','esci_label',
+    dataset_name="tasksource/esci",
+    pre_process=lambda ds:ds.filter(lambda x:x['product_locale']=='us'))
+
+def _preprocess_chatbot_arena(ds):
+    ds=ds.filter(lambda x:x['winner'] in ["model_a","model_b"])
+    ds=ds.filter(lambda x:x['language']=="English")
+
+    def _unroll(x):
+        f=lambda x:"\n".join([f"{turn['role']}:\n{turn['content']}" for turn in x])
+        x['conversation_a'] = f(x['conversation_a'])
+        x['conversation_b'] = f(x['conversation_b'])
+        return x
+    ds=ds.map(_unroll)
+    return ds
+
+chatbot_arena = MultipleChoice(constant(""),
+    choices=["conversation_a","conversation_b"],
+    labels=lambda x: ["model_a","model_b"].index(x["winner"]),
+    dataset_name="lmsys/chatbot_arena_conversations",
+    pre_process=_preprocess_chatbot_arena)
+
+dnd_intent = Classification("examples",labels="label_names",
+    dataset_name='neurae/dnd_style_intents')
+
+fld = Classification("context","hypothesis", "proof_label",
+    dataset_name="hitachi-nlp/FLD.v2")
+
+sdoh_nli = Classification("premise","hypothesis",labels=lambda x:{True:"entailment",False:"not-entailment"}[x['label']],
+    dataset_name="tasksource/SDOH-NLI")
+
+scifact_entailment = Classification(lambda x:"\n".join(x["abstract"]),"claim",
+    labels=lambda x:x['verdict'].replace('NEI','NEUTRAL'),
+    dataset_name="allenai/scifact_entailment")
+
+feasibilityQA = Classification(cat(['knowledge','premise']),'hypothesis','binary_classification_label',
+    dataset_name="tasksource/feasibilityQA")
+                               
+simple_pair = Classification("premise","hypothesis","label", dataset_name="tasksource/simple_pair")
+adjective_scale_probe = Classification("premise","hypothesis","label", dataset_name="tasksource/AdjectiveScaleProbe-nli")
+repectively_nli = Classification("premise","hypothesis","label",dataset_name="tasksource/resnli")
+
diff --git a/src/tasksource/recast.py b/src/tasksource/recast.py
index f7e0778..61a6952 100644
--- a/src/tasksource/recast.py
+++ b/src/tasksource/recast.py
@@ -3,9 +3,10 @@
 from sorcery import dict_of
 import string
 
-improper_labels =['recast/recast_kg_relations','linguisticprobing',"lex_glue/scotus","pragmeval/squinky","pragmeval/emobank",'pragmeval/persuasiveness']
+improper_labels =['recast/recast_kg_relations','linguisticprobing',"lex_glue/scotus",'lexical_relation_classification/ROOT09',"pragmeval/squinky","pragmeval/emobank",'pragmeval/persuasiveness']
 improper_labels += ['glue/stsb', 'sick/relatedness', 'joci', 'utilitarianism', 'amazon_counterfactual/en', 'toxic_conversations', 'ethos/multilabel', 'lex_glue/eurlex', 'lex_glue/unfair_tos', 'app_reviews', 'humicroedit/subtask-1', 'stackoverflow-questions', 'go_emotions/simplified', 'google_wellformed_query', 'has_part', 'blog_authorship_corpus/age', 'promptCoherence', 'Sarcasm_News_Headline', 'auditor_review/demo-org--auditor_review', 'Dynasent_Disagreement', 'Politeness_Disagreement', 'SBIC_Disagreement', 'SChem_Disagreement', 'Dilemmas_Disagreement', 'sts-companion', 'acceptability-prediction', 'chaos-mnli-ambiguity', 'headline_cause/en_simple', 'oasst1_dense_flat', 'civil_comments']
-improper_labels += ['stsb_multi_mt','MLMA_hate_speech','icl-symbol-tuning-instruct']
+
+improper_labels += ['stsb_multi_mt','MLMA_hate_speech','icl-symbol-tuning-instruct','zero-shot-label-nli']
 
 def render_options(options):
     options = [f'"{x}"' for x in options]
diff --git a/src/tasksource/tasks.py b/src/tasksource/tasks.py
index 8513a55..4cd3283 100755
--- a/src/tasksource/tasks.py
+++ b/src/tasksource/tasks.py
@@ -1094,7 +1094,7 @@ def _unroll(x):
     dataset_name="tasksource/SDOH-NLI")
 
 scifact_entailment = Classification(lambda x:"\n".join(x["abstract"]),"claim",
-    labels=lambda x:x['verdict'].replace('NEI','NEUTRAL'),
+    labels=lambda x:x['verdict'].replace('NEI','NEUTRAL').lower(),
     dataset_name="allenai/scifact_entailment")
 
 feasibilityQA = Classification(cat(['knowledge','premise']),'hypothesis','binary_classification_label',
@@ -1104,3 +1104,20 @@ def _unroll(x):
 adjective_scale_probe = Classification("premise","hypothesis","label", dataset_name="tasksource/AdjectiveScaleProbe-nli")
 repectively_nli = Classification("premise","hypothesis","label",dataset_name="tasksource/resnli")
 
+spartun=MultipleChoice(cat(["story","question"]),choices_list="candidate_answers",
+    labels=lambda x: [c.lower() for c in x['choices_list']].index(x["answer"][0].lower()),
+    pre_process=lambda ds:ds.filter(lambda x:len(x['answer'])==1),
+    dataset_name="tasksource/SpaRTUN")
+
+resq=MultipleChoice(cat(["story","question"]),choices_list="candidate_answers",
+    labels=lambda x: [c.lower() for c in x['choices_list']].index(x["answer"][0].lower()),
+    pre_process=lambda ds:ds.filter(lambda x:len(x['answer'])==1),
+    dataset_name="tasksource/ReSQ")
+
+semantic_fragments_nli = Classification("sentence1","sentence2","gold_label",
+    dataset_name="tasksource/semantic_fragments_nli")
+
+moritz_zs_nli = Classification('text','hypothesis','labels',
+    pre_process=lambda ds:ds.filter(lambda x:x['task_name'] not in  ["mnli", "anli", "fevernli", "wanli", "lingnli"]),
+    dataset_name="MoritzLaurer/dataset_train_nli"
+) 
\ No newline at end of file
diff --git a/tasks.md b/tasks.md
index 27e6657..dfb2c15 100644
--- a/tasks.md
+++ b/tasks.md
@@ -15,25 +15,25 @@
 |  12 | anli/a1                                                              | anli                                      |                                                     | a1              | anli__a1                                     | Classification      |
 |  13 | anli/a2                                                              | anli                                      |                                                     | a2              | anli__a2                                     | Classification      |
 |  14 | anli/a3                                                              | anli                                      |                                                     | a3              | anli__a3                                     | Classification      |
-|  15 | babi_nli/basic-induction                                             | metaeval/babi_nli                         | basic-induction                                     |                 | babi_nli                                     | Classification      |
-|  16 | babi_nli/time-reasoning                                              | metaeval/babi_nli                         | time-reasoning                                      |                 | babi_nli                                     | Classification      |
-|  17 | babi_nli/positional-reasoning                                        | metaeval/babi_nli                         | positional-reasoning                                |                 | babi_nli                                     | Classification      |
-|  18 | babi_nli/compound-coreference                                        | metaeval/babi_nli                         | compound-coreference                                |                 | babi_nli                                     | Classification      |
-|  19 | babi_nli/two-supporting-facts                                        | metaeval/babi_nli                         | two-supporting-facts                                |                 | babi_nli                                     | Classification      |
-|  20 | babi_nli/single-supporting-fact                                      | metaeval/babi_nli                         | single-supporting-fact                              |                 | babi_nli                                     | Classification      |
-|  21 | babi_nli/path-finding                                                | metaeval/babi_nli                         | path-finding                                        |                 | babi_nli                                     | Classification      |
-|  22 | babi_nli/basic-coreference                                           | metaeval/babi_nli                         | basic-coreference                                   |                 | babi_nli                                     | Classification      |
-|  23 | babi_nli/three-arg-relations                                         | metaeval/babi_nli                         | three-arg-relations                                 |                 | babi_nli                                     | Classification      |
-|  24 | babi_nli/conjunction                                                 | metaeval/babi_nli                         | conjunction                                         |                 | babi_nli                                     | Classification      |
-|  25 | babi_nli/three-supporting-facts                                      | metaeval/babi_nli                         | three-supporting-facts                              |                 | babi_nli                                     | Classification      |
-|  26 | babi_nli/yes-no-questions                                            | metaeval/babi_nli                         | yes-no-questions                                    |                 | babi_nli                                     | Classification      |
-|  27 | babi_nli/indefinite-knowledge                                        | metaeval/babi_nli                         | indefinite-knowledge                                |                 | babi_nli                                     | Classification      |
-|  28 | babi_nli/basic-deduction                                             | metaeval/babi_nli                         | basic-deduction                                     |                 | babi_nli                                     | Classification      |
-|  29 | babi_nli/counting                                                    | metaeval/babi_nli                         | counting                                            |                 | babi_nli                                     | Classification      |
-|  30 | babi_nli/simple-negation                                             | metaeval/babi_nli                         | simple-negation                                     |                 | babi_nli                                     | Classification      |
-|  31 | babi_nli/lists-sets                                                  | metaeval/babi_nli                         | lists-sets                                          |                 | babi_nli                                     | Classification      |
-|  32 | babi_nli/size-reasoning                                              | metaeval/babi_nli                         | size-reasoning                                      |                 | babi_nli                                     | Classification      |
-|  33 | babi_nli/two-arg-relations                                           | metaeval/babi_nli                         | two-arg-relations                                   |                 | babi_nli                                     | Classification      |
+|  15 | babi_nli/indefinite-knowledge                                        | metaeval/babi_nli                         | indefinite-knowledge                                |                 | babi_nli                                     | Classification      |
+|  16 | babi_nli/size-reasoning                                              | metaeval/babi_nli                         | size-reasoning                                      |                 | babi_nli                                     | Classification      |
+|  17 | babi_nli/counting                                                    | metaeval/babi_nli                         | counting                                            |                 | babi_nli                                     | Classification      |
+|  18 | babi_nli/two-supporting-facts                                        | metaeval/babi_nli                         | two-supporting-facts                                |                 | babi_nli                                     | Classification      |
+|  19 | babi_nli/basic-coreference                                           | metaeval/babi_nli                         | basic-coreference                                   |                 | babi_nli                                     | Classification      |
+|  20 | babi_nli/three-arg-relations                                         | metaeval/babi_nli                         | three-arg-relations                                 |                 | babi_nli                                     | Classification      |
+|  21 | babi_nli/compound-coreference                                        | metaeval/babi_nli                         | compound-coreference                                |                 | babi_nli                                     | Classification      |
+|  22 | babi_nli/two-arg-relations                                           | metaeval/babi_nli                         | two-arg-relations                                   |                 | babi_nli                                     | Classification      |
+|  23 | babi_nli/conjunction                                                 | metaeval/babi_nli                         | conjunction                                         |                 | babi_nli                                     | Classification      |
+|  24 | babi_nli/path-finding                                                | metaeval/babi_nli                         | path-finding                                        |                 | babi_nli                                     | Classification      |
+|  25 | babi_nli/basic-induction                                             | metaeval/babi_nli                         | basic-induction                                     |                 | babi_nli                                     | Classification      |
+|  26 | babi_nli/single-supporting-fact                                      | metaeval/babi_nli                         | single-supporting-fact                              |                 | babi_nli                                     | Classification      |
+|  27 | babi_nli/simple-negation                                             | metaeval/babi_nli                         | simple-negation                                     |                 | babi_nli                                     | Classification      |
+|  28 | babi_nli/time-reasoning                                              | metaeval/babi_nli                         | time-reasoning                                      |                 | babi_nli                                     | Classification      |
+|  29 | babi_nli/yes-no-questions                                            | metaeval/babi_nli                         | yes-no-questions                                    |                 | babi_nli                                     | Classification      |
+|  30 | babi_nli/basic-deduction                                             | metaeval/babi_nli                         | basic-deduction                                     |                 | babi_nli                                     | Classification      |
+|  31 | babi_nli/positional-reasoning                                        | metaeval/babi_nli                         | positional-reasoning                                |                 | babi_nli                                     | Classification      |
+|  32 | babi_nli/three-supporting-facts                                      | metaeval/babi_nli                         | three-supporting-facts                              |                 | babi_nli                                     | Classification      |
+|  33 | babi_nli/lists-sets                                                  | metaeval/babi_nli                         | lists-sets                                          |                 | babi_nli                                     | Classification      |
 |  34 | sick/label                                                           | sick                                      |                                                     | label           | sick__label                                  | Classification      |
 |  35 | sick/relatedness                                                     | sick                                      |                                                     | relatedness     | sick__relatedness                            | Classification      |
 |  36 | sick/entailment_AB                                                   | sick                                      |                                                     | entailment_AB   | sick__entailment_AB                          | Classification      |
@@ -41,17 +41,17 @@
 |  38 | scitail/snli_format                                                  | scitail                                   | snli_format                                         |                 | scitail                                      | Classification      |
 |  39 | hans                                                                 | hans                                      |                                                     |                 | hans                                         | Classification      |
 |  40 | WANLI                                                                | alisawuffles/WANLI                        |                                                     |                 | wanli                                        | Classification      |
-|  41 | recast/recast_sentiment                                              | metaeval/recast                           | recast_sentiment                                    |                 | recast_nli                                   | Classification      |
+|  41 | recast/recast_verbnet                                                | metaeval/recast                           | recast_verbnet                                      |                 | recast_nli                                   | Classification      |
 |  42 | recast/recast_kg_relations                                           | metaeval/recast                           | recast_kg_relations                                 |                 | recast_nli                                   | Classification      |
-|  43 | recast/recast_verbnet                                                | metaeval/recast                           | recast_verbnet                                      |                 | recast_nli                                   | Classification      |
-|  44 | recast/recast_megaveridicality                                       | metaeval/recast                           | recast_megaveridicality                             |                 | recast_nli                                   | Classification      |
+|  43 | recast/recast_ner                                                    | metaeval/recast                           | recast_ner                                          |                 | recast_nli                                   | Classification      |
+|  44 | recast/recast_factuality                                             | metaeval/recast                           | recast_factuality                                   |                 | recast_nli                                   | Classification      |
 |  45 | recast/recast_puns                                                   | metaeval/recast                           | recast_puns                                         |                 | recast_nli                                   | Classification      |
-|  46 | recast/recast_verbcorner                                             | metaeval/recast                           | recast_verbcorner                                   |                 | recast_nli                                   | Classification      |
-|  47 | recast/recast_ner                                                    | metaeval/recast                           | recast_ner                                          |                 | recast_nli                                   | Classification      |
-|  48 | recast/recast_factuality                                             | metaeval/recast                           | recast_factuality                                   |                 | recast_nli                                   | Classification      |
-|  49 | probability_words_nli/reasoning_2hop                                 | sileod/probability_words_nli              | reasoning_2hop                                      |                 | probability_words_nli                        | Classification      |
+|  46 | recast/recast_megaveridicality                                       | metaeval/recast                           | recast_megaveridicality                             |                 | recast_nli                                   | Classification      |
+|  47 | recast/recast_sentiment                                              | metaeval/recast                           | recast_sentiment                                    |                 | recast_nli                                   | Classification      |
+|  48 | recast/recast_verbcorner                                             | metaeval/recast                           | recast_verbcorner                                   |                 | recast_nli                                   | Classification      |
+|  49 | probability_words_nli/usnli                                          | sileod/probability_words_nli              | usnli                                               |                 | probability_words_nli                        | Classification      |
 |  50 | probability_words_nli/reasoning_1hop                                 | sileod/probability_words_nli              | reasoning_1hop                                      |                 | probability_words_nli                        | Classification      |
-|  51 | probability_words_nli/usnli                                          | sileod/probability_words_nli              | usnli                                               |                 | probability_words_nli                        | Classification      |
+|  51 | probability_words_nli/reasoning_2hop                                 | sileod/probability_words_nli              | reasoning_2hop                                      |                 | probability_words_nli                        | Classification      |
 |  52 | nan-nli/joey234--nan-nli                                             | joey234/nan-nli                           | joey234--nan-nli                                    |                 | nan_nli                                      | Classification      |
 |  53 | nli_fever                                                            | pietrolesci/nli_fever                     |                                                     |                 | nli_fever                                    | Classification      |
 |  54 | breaking_nli                                                         | pietrolesci/breaking_nli                  |                                                     |                 | breaking_nli                                 | Classification      |
@@ -81,29 +81,29 @@
 |  78 | gen_debiased_nli/mnli_z_aug                                          | pietrolesci/gen_debiased_nli              |                                                     | mnli_z_aug      | gen_debiased_nli__mnli_z_aug                 | Classification      |
 |  79 | gen_debiased_nli/mnli_seq_z                                          | pietrolesci/gen_debiased_nli              |                                                     | mnli_seq_z      | gen_debiased_nli__mnli_seq_z                 | Classification      |
 |  80 | add_one_rte                                                          | pietrolesci/add_one_rte                   |                                                     |                 | add_one_rte                                  | Classification      |
-|  81 | imppres/presupposition_only_presupposition/presupposition            | metaeval/imppres                          | presupposition_only_presupposition                  | presupposition  | imppres__presupposition                      | Classification      |
-|  82 | imppres/presupposition_question_presupposition/presupposition        | metaeval/imppres                          | presupposition_question_presupposition              | presupposition  | imppres__presupposition                      | Classification      |
-|  83 | imppres/presupposition_possessed_definites_existence/presupposition  | metaeval/imppres                          | presupposition_possessed_definites_existence        | presupposition  | imppres__presupposition                      | Classification      |
-|  84 | imppres/presupposition_all_n_presupposition/presupposition           | metaeval/imppres                          | presupposition_all_n_presupposition                 | presupposition  | imppres__presupposition                      | Classification      |
+|  81 | imppres/presupposition_both_presupposition/presupposition            | metaeval/imppres                          | presupposition_both_presupposition                  | presupposition  | imppres__presupposition                      | Classification      |
+|  82 | imppres/presupposition_all_n_presupposition/presupposition           | metaeval/imppres                          | presupposition_all_n_presupposition                 | presupposition  | imppres__presupposition                      | Classification      |
+|  83 | imppres/presupposition_change_of_state/presupposition                | metaeval/imppres                          | presupposition_change_of_state                      | presupposition  | imppres__presupposition                      | Classification      |
+|  84 | imppres/presupposition_possessed_definites_uniqueness/presupposition | metaeval/imppres                          | presupposition_possessed_definites_uniqueness       | presupposition  | imppres__presupposition                      | Classification      |
 |  85 | imppres/presupposition_cleft_existence/presupposition                | metaeval/imppres                          | presupposition_cleft_existence                      | presupposition  | imppres__presupposition                      | Classification      |
 |  86 | imppres/presupposition_cleft_uniqueness/presupposition               | metaeval/imppres                          | presupposition_cleft_uniqueness                     | presupposition  | imppres__presupposition                      | Classification      |
-|  87 | imppres/presupposition_change_of_state/presupposition                | metaeval/imppres                          | presupposition_change_of_state                      | presupposition  | imppres__presupposition                      | Classification      |
-|  88 | imppres/presupposition_both_presupposition/presupposition            | metaeval/imppres                          | presupposition_both_presupposition                  | presupposition  | imppres__presupposition                      | Classification      |
-|  89 | imppres/presupposition_possessed_definites_uniqueness/presupposition | metaeval/imppres                          | presupposition_possessed_definites_uniqueness       | presupposition  | imppres__presupposition                      | Classification      |
-|  90 | imppres/implicature_connectives/prag                                 | metaeval/imppres                          | implicature_connectives                             | prag            | imppres__prag                                | Classification      |
-|  91 | imppres/implicature_gradable_verb/prag                               | metaeval/imppres                          | implicature_gradable_verb                           | prag            | imppres__prag                                | Classification      |
-|  92 | imppres/implicature_modals/prag                                      | metaeval/imppres                          | implicature_modals                                  | prag            | imppres__prag                                | Classification      |
-|  93 | imppres/implicature_quantifiers/prag                                 | metaeval/imppres                          | implicature_quantifiers                             | prag            | imppres__prag                                | Classification      |
-|  94 | imppres/implicature_numerals_2_3/prag                                | metaeval/imppres                          | implicature_numerals_2_3                            | prag            | imppres__prag                                | Classification      |
-|  95 | imppres/implicature_numerals_10_100/prag                             | metaeval/imppres                          | implicature_numerals_10_100                         | prag            | imppres__prag                                | Classification      |
-|  96 | imppres/implicature_gradable_adjective/prag                          | metaeval/imppres                          | implicature_gradable_adjective                      | prag            | imppres__prag                                | Classification      |
-|  97 | imppres/implicature_connectives/log                                  | metaeval/imppres                          | implicature_connectives                             | log             | imppres__log                                 | Classification      |
-|  98 | imppres/implicature_modals/log                                       | metaeval/imppres                          | implicature_modals                                  | log             | imppres__log                                 | Classification      |
-|  99 | imppres/implicature_numerals_2_3/log                                 | metaeval/imppres                          | implicature_numerals_2_3                            | log             | imppres__log                                 | Classification      |
+|  87 | imppres/presupposition_question_presupposition/presupposition        | metaeval/imppres                          | presupposition_question_presupposition              | presupposition  | imppres__presupposition                      | Classification      |
+|  88 | imppres/presupposition_only_presupposition/presupposition            | metaeval/imppres                          | presupposition_only_presupposition                  | presupposition  | imppres__presupposition                      | Classification      |
+|  89 | imppres/presupposition_possessed_definites_existence/presupposition  | metaeval/imppres                          | presupposition_possessed_definites_existence        | presupposition  | imppres__presupposition                      | Classification      |
+|  90 | imppres/implicature_gradable_verb/prag                               | metaeval/imppres                          | implicature_gradable_verb                           | prag            | imppres__prag                                | Classification      |
+|  91 | imppres/implicature_quantifiers/prag                                 | metaeval/imppres                          | implicature_quantifiers                             | prag            | imppres__prag                                | Classification      |
+|  92 | imppres/implicature_connectives/prag                                 | metaeval/imppres                          | implicature_connectives                             | prag            | imppres__prag                                | Classification      |
+|  93 | imppres/implicature_modals/prag                                      | metaeval/imppres                          | implicature_modals                                  | prag            | imppres__prag                                | Classification      |
+|  94 | imppres/implicature_numerals_10_100/prag                             | metaeval/imppres                          | implicature_numerals_10_100                         | prag            | imppres__prag                                | Classification      |
+|  95 | imppres/implicature_gradable_adjective/prag                          | metaeval/imppres                          | implicature_gradable_adjective                      | prag            | imppres__prag                                | Classification      |
+|  96 | imppres/implicature_numerals_2_3/prag                                | metaeval/imppres                          | implicature_numerals_2_3                            | prag            | imppres__prag                                | Classification      |
+|  97 | imppres/implicature_numerals_10_100/log                              | metaeval/imppres                          | implicature_numerals_10_100                         | log             | imppres__log                                 | Classification      |
+|  98 | imppres/implicature_connectives/log                                  | metaeval/imppres                          | implicature_connectives                             | log             | imppres__log                                 | Classification      |
+|  99 | imppres/implicature_modals/log                                       | metaeval/imppres                          | implicature_modals                                  | log             | imppres__log                                 | Classification      |
 | 100 | imppres/implicature_gradable_verb/log                                | metaeval/imppres                          | implicature_gradable_verb                           | log             | imppres__log                                 | Classification      |
 | 101 | imppres/implicature_gradable_adjective/log                           | metaeval/imppres                          | implicature_gradable_adjective                      | log             | imppres__log                                 | Classification      |
-| 102 | imppres/implicature_quantifiers/log                                  | metaeval/imppres                          | implicature_quantifiers                             | log             | imppres__log                                 | Classification      |
-| 103 | imppres/implicature_numerals_10_100/log                              | metaeval/imppres                          | implicature_numerals_10_100                         | log             | imppres__log                                 | Classification      |
+| 102 | imppres/implicature_numerals_2_3/log                                 | metaeval/imppres                          | implicature_numerals_2_3                            | log             | imppres__log                                 | Classification      |
+| 103 | imppres/implicature_quantifiers/log                                  | metaeval/imppres                          | implicature_quantifiers                             | log             | imppres__log                                 | Classification      |
 | 104 | glue_diagnostics/diagnostics                                         | pietrolesci/glue_diagnostics              |                                                     | diagnostics     | glue__diagnostics                            | Classification      |
 | 105 | hlgd                                                                 | hlgd                                      |                                                     |                 | hlgd                                         | Classification      |
 | 106 | paws/labeled_final                                                   | paws                                      | labeled_final                                       |                 | paws___labeled_final                         | Classification      |
@@ -116,128 +116,128 @@
 | 113 | model-written-evals                                                  | Anthropic/model-written-evals             |                                                     |                 | model_written_evals                          | MultipleChoice      |
 | 114 | truthful_qa/multiple_choice                                          | truthful_qa                               | multiple_choice                                     |                 | truthful_qa___multiple_choice                | MultipleChoice      |
 | 115 | fig-qa                                                               | nightingal3/fig-qa                        |                                                     |                 | fig_qa                                       | MultipleChoice      |
-| 116 | bigbench/undo_permutation                                            | tasksource/bigbench                       | undo_permutation                                    |                 | bigbench                                     | MultipleChoice      |
-| 117 | bigbench/gre_reading_comprehension                                   | tasksource/bigbench                       | gre_reading_comprehension                           |                 | bigbench                                     | MultipleChoice      |
-| 118 | bigbench/analogical_similarity                                       | tasksource/bigbench                       | analogical_similarity                               |                 | bigbench                                     | MultipleChoice      |
-| 119 | bigbench/identify_math_theorems                                      | tasksource/bigbench                       | identify_math_theorems                              |                 | bigbench                                     | MultipleChoice      |
-| 120 | bigbench/intent_recognition                                          | tasksource/bigbench                       | intent_recognition                                  |                 | bigbench                                     | MultipleChoice      |
-| 121 | bigbench/identify_odd_metaphor                                       | tasksource/bigbench                       | identify_odd_metaphor                               |                 | bigbench                                     | MultipleChoice      |
-| 122 | bigbench/logical_sequence                                            | tasksource/bigbench                       | logical_sequence                                    |                 | bigbench                                     | MultipleChoice      |
-| 123 | bigbench/checkmate_in_one                                            | tasksource/bigbench                       | checkmate_in_one                                    |                 | bigbench                                     | MultipleChoice      |
-| 124 | bigbench/english_proverbs                                            | tasksource/bigbench                       | english_proverbs                                    |                 | bigbench                                     | MultipleChoice      |
-| 125 | bigbench/real_or_fake_text                                           | tasksource/bigbench                       | real_or_fake_text                                   |                 | bigbench                                     | MultipleChoice      |
-| 126 | bigbench/phrase_relatedness                                          | tasksource/bigbench                       | phrase_relatedness                                  |                 | bigbench                                     | MultipleChoice      |
-| 127 | bigbench/empirical_judgments                                         | tasksource/bigbench                       | empirical_judgments                                 |                 | bigbench                                     | MultipleChoice      |
-| 128 | bigbench/timedial                                                    | tasksource/bigbench                       | timedial                                            |                 | bigbench                                     | MultipleChoice      |
-| 129 | bigbench/abstract_narrative_understanding                            | tasksource/bigbench                       | abstract_narrative_understanding                    |                 | bigbench                                     | MultipleChoice      |
-| 130 | bigbench/fact_checker                                                | tasksource/bigbench                       | fact_checker                                        |                 | bigbench                                     | MultipleChoice      |
-| 131 | bigbench/simple_ethical_questions                                    | tasksource/bigbench                       | simple_ethical_questions                            |                 | bigbench                                     | MultipleChoice      |
-| 132 | bigbench/temporal_sequences                                          | tasksource/bigbench                       | temporal_sequences                                  |                 | bigbench                                     | MultipleChoice      |
-| 133 | bigbench/logic_grid_puzzle                                           | tasksource/bigbench                       | logic_grid_puzzle                                   |                 | bigbench                                     | MultipleChoice      |
-| 134 | bigbench/cause_and_effect                                            | tasksource/bigbench                       | cause_and_effect                                    |                 | bigbench                                     | MultipleChoice      |
-| 135 | bigbench/sentence_ambiguity                                          | tasksource/bigbench                       | sentence_ambiguity                                  |                 | bigbench                                     | MultipleChoice      |
-| 136 | bigbench/understanding_fables                                        | tasksource/bigbench                       | understanding_fables                                |                 | bigbench                                     | MultipleChoice      |
-| 137 | bigbench/moral_permissibility                                        | tasksource/bigbench                       | moral_permissibility                                |                 | bigbench                                     | MultipleChoice      |
-| 138 | bigbench/international_phonetic_alphabet_nli                         | tasksource/bigbench                       | international_phonetic_alphabet_nli                 |                 | bigbench                                     | MultipleChoice      |
-| 139 | bigbench/misconceptions                                              | tasksource/bigbench                       | misconceptions                                      |                 | bigbench                                     | MultipleChoice      |
-| 140 | bigbench/movie_recommendation                                        | tasksource/bigbench                       | movie_recommendation                                |                 | bigbench                                     | MultipleChoice      |
-| 141 | bigbench/disambiguation_qa                                           | tasksource/bigbench                       | disambiguation_qa                                   |                 | bigbench                                     | MultipleChoice      |
-| 142 | bigbench/metaphor_understanding                                      | tasksource/bigbench                       | metaphor_understanding                              |                 | bigbench                                     | MultipleChoice      |
-| 143 | bigbench/logical_fallacy_detection                                   | tasksource/bigbench                       | logical_fallacy_detection                           |                 | bigbench                                     | MultipleChoice      |
-| 144 | bigbench/suicide_risk                                                | tasksource/bigbench                       | suicide_risk                                        |                 | bigbench                                     | MultipleChoice      |
-| 145 | bigbench/dark_humor_detection                                        | tasksource/bigbench                       | dark_humor_detection                                |                 | bigbench                                     | MultipleChoice      |
-| 146 | bigbench/conceptual_combinations                                     | tasksource/bigbench                       | conceptual_combinations                             |                 | bigbench                                     | MultipleChoice      |
-| 147 | bigbench/arithmetic                                                  | tasksource/bigbench                       | arithmetic                                          |                 | bigbench                                     | MultipleChoice      |
-| 148 | bigbench/nonsense_words_grammar                                      | tasksource/bigbench                       | nonsense_words_grammar                              |                 | bigbench                                     | MultipleChoice      |
-| 149 | bigbench/goal_step_wikihow                                           | tasksource/bigbench                       | goal_step_wikihow                                   |                 | bigbench                                     | MultipleChoice      |
-| 150 | bigbench/metaphor_boolean                                            | tasksource/bigbench                       | metaphor_boolean                                    |                 | bigbench                                     | MultipleChoice      |
-| 151 | bigbench/mnist_ascii                                                 | tasksource/bigbench                       | mnist_ascii                                         |                 | bigbench                                     | MultipleChoice      |
-| 152 | bigbench/irony_identification                                        | tasksource/bigbench                       | irony_identification                                |                 | bigbench                                     | MultipleChoice      |
-| 153 | bigbench/question_selection                                          | tasksource/bigbench                       | question_selection                                  |                 | bigbench                                     | MultipleChoice      |
-| 154 | bigbench/logical_deduction                                           | tasksource/bigbench                       | logical_deduction                                   |                 | bigbench                                     | MultipleChoice      |
-| 155 | bigbench/hindu_knowledge                                             | tasksource/bigbench                       | hindu_knowledge                                     |                 | bigbench                                     | MultipleChoice      |
-| 156 | bigbench/movie_dialog_same_or_different                              | tasksource/bigbench                       | movie_dialog_same_or_different                      |                 | bigbench                                     | MultipleChoice      |
-| 157 | bigbench/social_iqa                                                  | tasksource/bigbench                       | social_iqa                                          |                 | bigbench                                     | MultipleChoice      |
-| 158 | bigbench/strategyqa                                                  | tasksource/bigbench                       | strategyqa                                          |                 | bigbench                                     | MultipleChoice      |
-| 159 | bigbench/tracking_shuffled_objects                                   | tasksource/bigbench                       | tracking_shuffled_objects                           |                 | bigbench                                     | MultipleChoice      |
-| 160 | bigbench/discourse_marker_prediction                                 | tasksource/bigbench                       | discourse_marker_prediction                         |                 | bigbench                                     | MultipleChoice      |
-| 161 | bigbench/physical_intuition                                          | tasksource/bigbench                       | physical_intuition                                  |                 | bigbench                                     | MultipleChoice      |
-| 162 | bigbench/causal_judgment                                             | tasksource/bigbench                       | causal_judgment                                     |                 | bigbench                                     | MultipleChoice      |
-| 163 | bigbench/code_line_description                                       | tasksource/bigbench                       | code_line_description                               |                 | bigbench                                     | MultipleChoice      |
-| 164 | bigbench/implicatures                                                | tasksource/bigbench                       | implicatures                                        |                 | bigbench                                     | MultipleChoice      |
-| 165 | bigbench/crash_blossom                                               | tasksource/bigbench                       | crash_blossom                                       |                 | bigbench                                     | MultipleChoice      |
-| 166 | bigbench/known_unknowns                                              | tasksource/bigbench                       | known_unknowns                                      |                 | bigbench                                     | MultipleChoice      |
-| 167 | bigbench/entailed_polarity                                           | tasksource/bigbench                       | entailed_polarity                                   |                 | bigbench                                     | MultipleChoice      |
-| 168 | bigbench/novel_concepts                                              | tasksource/bigbench                       | novel_concepts                                      |                 | bigbench                                     | MultipleChoice      |
-| 169 | bigbench/dyck_languages                                              | tasksource/bigbench                       | dyck_languages                                      |                 | bigbench                                     | MultipleChoice      |
-| 170 | bigbench/ruin_names                                                  | tasksource/bigbench                       | ruin_names                                          |                 | bigbench                                     | MultipleChoice      |
-| 171 | bigbench/figure_of_speech_detection                                  | tasksource/bigbench                       | figure_of_speech_detection                          |                 | bigbench                                     | MultipleChoice      |
-| 172 | bigbench/vitaminc_fact_verification                                  | tasksource/bigbench                       | vitaminc_fact_verification                          |                 | bigbench                                     | MultipleChoice      |
-| 173 | bigbench/emojis_emotion_prediction                                   | tasksource/bigbench                       | emojis_emotion_prediction                           |                 | bigbench                                     | MultipleChoice      |
-| 174 | bigbench/odd_one_out                                                 | tasksource/bigbench                       | odd_one_out                                         |                 | bigbench                                     | MultipleChoice      |
-| 175 | bigbench/play_dialog_same_or_different                               | tasksource/bigbench                       | play_dialog_same_or_different                       |                 | bigbench                                     | MultipleChoice      |
-| 176 | bigbench/formal_fallacies_syllogisms_negation                        | tasksource/bigbench                       | formal_fallacies_syllogisms_negation                |                 | bigbench                                     | MultipleChoice      |
-| 177 | bigbench/hhh_alignment                                               | tasksource/bigbench                       | hhh_alignment                                       |                 | bigbench                                     | MultipleChoice      |
-| 178 | bigbench/salient_translation_error_detection                         | tasksource/bigbench                       | salient_translation_error_detection                 |                 | bigbench                                     | MultipleChoice      |
-| 179 | bigbench/riddle_sense                                                | tasksource/bigbench                       | riddle_sense                                        |                 | bigbench                                     | MultipleChoice      |
-| 180 | bigbench/elementary_math_qa                                          | tasksource/bigbench                       | elementary_math_qa                                  |                 | bigbench                                     | MultipleChoice      |
-| 181 | bigbench/cifar10_classification                                      | tasksource/bigbench                       | cifar10_classification                              |                 | bigbench                                     | MultipleChoice      |
-| 182 | bigbench/geometric_shapes                                            | tasksource/bigbench                       | geometric_shapes                                    |                 | bigbench                                     | MultipleChoice      |
-| 183 | bigbench/human_organs_senses                                         | tasksource/bigbench                       | human_organs_senses                                 |                 | bigbench                                     | MultipleChoice      |
-| 184 | bigbench/crass_ai                                                    | tasksource/bigbench                       | crass_ai                                            |                 | bigbench                                     | MultipleChoice      |
-| 185 | bigbench/date_understanding                                          | tasksource/bigbench                       | date_understanding                                  |                 | bigbench                                     | MultipleChoice      |
-| 186 | bigbench/analytic_entailment                                         | tasksource/bigbench                       | analytic_entailment                                 |                 | bigbench                                     | MultipleChoice      |
-| 187 | bigbench/similarities_abstraction                                    | tasksource/bigbench                       | similarities_abstraction                            |                 | bigbench                                     | MultipleChoice      |
-| 188 | bigbench/winowhy                                                     | tasksource/bigbench                       | winowhy                                             |                 | bigbench                                     | MultipleChoice      |
-| 189 | bigbench/mathematical_induction                                      | tasksource/bigbench                       | mathematical_induction                              |                 | bigbench                                     | MultipleChoice      |
-| 190 | bigbench/logical_args                                                | tasksource/bigbench                       | logical_args                                        |                 | bigbench                                     | MultipleChoice      |
-| 191 | bigbench/snarks                                                      | tasksource/bigbench                       | snarks                                              |                 | bigbench                                     | MultipleChoice      |
-| 192 | bigbench/anachronisms                                                | tasksource/bigbench                       | anachronisms                                        |                 | bigbench                                     | MultipleChoice      |
-| 193 | bigbench/epistemic_reasoning                                         | tasksource/bigbench                       | epistemic_reasoning                                 |                 | bigbench                                     | MultipleChoice      |
-| 194 | bigbench/reasoning_about_colored_objects                             | tasksource/bigbench                       | reasoning_about_colored_objects                     |                 | bigbench                                     | MultipleChoice      |
-| 195 | bigbench/presuppositions_as_nli                                      | tasksource/bigbench                       | presuppositions_as_nli                              |                 | bigbench                                     | MultipleChoice      |
-| 196 | bigbench/contextual_parametric_knowledge_conflicts                   | tasksource/bigbench                       | contextual_parametric_knowledge_conflicts           |                 | bigbench                                     | MultipleChoice      |
-| 197 | bigbench/bbq_lite_json                                               | tasksource/bigbench                       | bbq_lite_json                                       |                 | bigbench                                     | MultipleChoice      |
-| 198 | bigbench/fantasy_reasoning                                           | tasksource/bigbench                       | fantasy_reasoning                                   |                 | bigbench                                     | MultipleChoice      |
-| 199 | bigbench/color                                                       | tasksource/bigbench                       | color                                               |                 | bigbench                                     | MultipleChoice      |
-| 200 | bigbench/penguins_in_a_table                                         | tasksource/bigbench                       | penguins_in_a_table                                 |                 | bigbench                                     | MultipleChoice      |
-| 201 | bigbench/evaluating_information_essentiality                         | tasksource/bigbench                       | evaluating_information_essentiality                 |                 | bigbench                                     | MultipleChoice      |
-| 202 | bigbench/cs_algorithms                                               | tasksource/bigbench                       | cs_algorithms                                       |                 | bigbench                                     | MultipleChoice      |
-| 203 | bigbench/physics                                                     | tasksource/bigbench                       | physics                                             |                 | bigbench                                     | MultipleChoice      |
-| 204 | bigbench/navigate                                                    | tasksource/bigbench                       | navigate                                            |                 | bigbench                                     | MultipleChoice      |
-| 205 | bigbench/key_value_maps                                              | tasksource/bigbench                       | key_value_maps                                      |                 | bigbench                                     | MultipleChoice      |
-| 206 | bigbench/strange_stories                                             | tasksource/bigbench                       | strange_stories                                     |                 | bigbench                                     | MultipleChoice      |
-| 207 | bigbench/sports_understanding                                        | tasksource/bigbench                       | sports_understanding                                |                 | bigbench                                     | MultipleChoice      |
-| 208 | bigbench/unit_interpretation                                         | tasksource/bigbench                       | unit_interpretation                                 |                 | bigbench                                     | MultipleChoice      |
-| 209 | bigbench/general_knowledge                                           | tasksource/bigbench                       | general_knowledge                                   |                 | bigbench                                     | MultipleChoice      |
-| 210 | bigbench/authorship_verification                                     | tasksource/bigbench                       | authorship_verification                             |                 | bigbench                                     | MultipleChoice      |
-| 211 | bigbench/emoji_movie                                                 | tasksource/bigbench                       | emoji_movie                                         |                 | bigbench                                     | MultipleChoice      |
-| 212 | bigbench/symbol_interpretation                                       | tasksource/bigbench                       | symbol_interpretation                               |                 | bigbench                                     | MultipleChoice      |
-| 213 | bigbench/hyperbaton                                                  | tasksource/bigbench                       | hyperbaton                                          |                 | bigbench                                     | MultipleChoice      |
-| 214 | bigbench/implicit_relations                                          | tasksource/bigbench                       | implicit_relations                                  |                 | bigbench                                     | MultipleChoice      |
-| 215 | bigbench/social_support                                              | tasksource/bigbench                       | social_support                                      |                 | bigbench                                     | MultipleChoice      |
-| 216 | blimp/principle_A_c_command                                          | blimp                                     | principle_A_c_command                               |                 | blimp_hard                                   | MultipleChoice      |
-| 217 | blimp/wh_questions_subject_gap_long_distance                         | blimp                                     | wh_questions_subject_gap_long_distance              |                 | blimp_hard                                   | MultipleChoice      |
-| 218 | blimp/drop_argument                                                  | blimp                                     | drop_argument                                       |                 | blimp_hard                                   | MultipleChoice      |
-| 219 | blimp/coordinate_structure_constraint_object_extraction              | blimp                                     | coordinate_structure_constraint_object_extraction   |                 | blimp_hard                                   | MultipleChoice      |
-| 220 | blimp/npi_present_2                                                  | blimp                                     | npi_present_2                                       |                 | blimp_hard                                   | MultipleChoice      |
-| 221 | blimp/matrix_question_npi_licensor_present                           | blimp                                     | matrix_question_npi_licensor_present                |                 | blimp_hard                                   | MultipleChoice      |
-| 222 | blimp/superlative_quantifiers_1                                      | blimp                                     | superlative_quantifiers_1                           |                 | blimp_hard                                   | MultipleChoice      |
-| 223 | blimp/left_branch_island_echo_question                               | blimp                                     | left_branch_island_echo_question                    |                 | blimp_hard                                   | MultipleChoice      |
-| 224 | blimp/wh_vs_that_with_gap_long_distance                              | blimp                                     | wh_vs_that_with_gap_long_distance                   |                 | blimp_hard                                   | MultipleChoice      |
-| 225 | blimp/tough_vs_raising_1                                             | blimp                                     | tough_vs_raising_1                                  |                 | blimp_hard                                   | MultipleChoice      |
-| 226 | blimp/npi_present_1                                                  | blimp                                     | npi_present_1                                       |                 | blimp_hard                                   | MultipleChoice      |
-| 227 | blimp/coordinate_structure_constraint_complex_left_branch            | blimp                                     | coordinate_structure_constraint_complex_left_branch |                 | blimp_hard                                   | MultipleChoice      |
-| 228 | blimp/principle_A_domain_2                                           | blimp                                     | principle_A_domain_2                                |                 | blimp_hard                                   | MultipleChoice      |
-| 229 | blimp/wh_vs_that_with_gap                                            | blimp                                     | wh_vs_that_with_gap                                 |                 | blimp_hard                                   | MultipleChoice      |
-| 230 | blimp/sentential_subject_island                                      | blimp                                     | sentential_subject_island                           |                 | blimp_hard                                   | MultipleChoice      |
-| 231 | blimp/wh_questions_object_gap                                        | blimp                                     | wh_questions_object_gap                             |                 | blimp_hard                                   | MultipleChoice      |
-| 232 | blimp/sentential_negation_npi_scope                                  | blimp                                     | sentential_negation_npi_scope                       |                 | blimp_hard                                   | MultipleChoice      |
-| 233 | blimp/animate_subject_passive                                        | blimp                                     | animate_subject_passive                             |                 | blimp_hard                                   | MultipleChoice      |
-| 234 | blimp/existential_there_quantifiers_2                                | blimp                                     | existential_there_quantifiers_2                     |                 | blimp_hard                                   | MultipleChoice      |
-| 235 | blimp/inchoative                                                     | blimp                                     | inchoative                                          |                 | blimp_hard                                   | MultipleChoice      |
-| 236 | blimp/principle_A_reconstruction                                     | blimp                                     | principle_A_reconstruction                          |                 | blimp_hard                                   | MultipleChoice      |
-| 237 | blimp/complex_NP_island                                              | blimp                                     | complex_NP_island                                   |                 | blimp_hard                                   | MultipleChoice      |
+| 116 | bigbench/authorship_verification                                     | tasksource/bigbench                       | authorship_verification                             |                 | bigbench                                     | MultipleChoice      |
+| 117 | bigbench/tracking_shuffled_objects                                   | tasksource/bigbench                       | tracking_shuffled_objects                           |                 | bigbench                                     | MultipleChoice      |
+| 118 | bigbench/fantasy_reasoning                                           | tasksource/bigbench                       | fantasy_reasoning                                   |                 | bigbench                                     | MultipleChoice      |
+| 119 | bigbench/ruin_names                                                  | tasksource/bigbench                       | ruin_names                                          |                 | bigbench                                     | MultipleChoice      |
+| 120 | bigbench/salient_translation_error_detection                         | tasksource/bigbench                       | salient_translation_error_detection                 |                 | bigbench                                     | MultipleChoice      |
+| 121 | bigbench/key_value_maps                                              | tasksource/bigbench                       | key_value_maps                                      |                 | bigbench                                     | MultipleChoice      |
+| 122 | bigbench/novel_concepts                                              | tasksource/bigbench                       | novel_concepts                                      |                 | bigbench                                     | MultipleChoice      |
+| 123 | bigbench/winowhy                                                     | tasksource/bigbench                       | winowhy                                             |                 | bigbench                                     | MultipleChoice      |
+| 124 | bigbench/mnist_ascii                                                 | tasksource/bigbench                       | mnist_ascii                                         |                 | bigbench                                     | MultipleChoice      |
+| 125 | bigbench/hindu_knowledge                                             | tasksource/bigbench                       | hindu_knowledge                                     |                 | bigbench                                     | MultipleChoice      |
+| 126 | bigbench/contextual_parametric_knowledge_conflicts                   | tasksource/bigbench                       | contextual_parametric_knowledge_conflicts           |                 | bigbench                                     | MultipleChoice      |
+| 127 | bigbench/bbq_lite_json                                               | tasksource/bigbench                       | bbq_lite_json                                       |                 | bigbench                                     | MultipleChoice      |
+| 128 | bigbench/strange_stories                                             | tasksource/bigbench                       | strange_stories                                     |                 | bigbench                                     | MultipleChoice      |
+| 129 | bigbench/hyperbaton                                                  | tasksource/bigbench                       | hyperbaton                                          |                 | bigbench                                     | MultipleChoice      |
+| 130 | bigbench/strategyqa                                                  | tasksource/bigbench                       | strategyqa                                          |                 | bigbench                                     | MultipleChoice      |
+| 131 | bigbench/cause_and_effect                                            | tasksource/bigbench                       | cause_and_effect                                    |                 | bigbench                                     | MultipleChoice      |
+| 132 | bigbench/empirical_judgments                                         | tasksource/bigbench                       | empirical_judgments                                 |                 | bigbench                                     | MultipleChoice      |
+| 133 | bigbench/understanding_fables                                        | tasksource/bigbench                       | understanding_fables                                |                 | bigbench                                     | MultipleChoice      |
+| 134 | bigbench/timedial                                                    | tasksource/bigbench                       | timedial                                            |                 | bigbench                                     | MultipleChoice      |
+| 135 | bigbench/metaphor_boolean                                            | tasksource/bigbench                       | metaphor_boolean                                    |                 | bigbench                                     | MultipleChoice      |
+| 136 | bigbench/formal_fallacies_syllogisms_negation                        | tasksource/bigbench                       | formal_fallacies_syllogisms_negation                |                 | bigbench                                     | MultipleChoice      |
+| 137 | bigbench/question_selection                                          | tasksource/bigbench                       | question_selection                                  |                 | bigbench                                     | MultipleChoice      |
+| 138 | bigbench/human_organs_senses                                         | tasksource/bigbench                       | human_organs_senses                                 |                 | bigbench                                     | MultipleChoice      |
+| 139 | bigbench/date_understanding                                          | tasksource/bigbench                       | date_understanding                                  |                 | bigbench                                     | MultipleChoice      |
+| 140 | bigbench/undo_permutation                                            | tasksource/bigbench                       | undo_permutation                                    |                 | bigbench                                     | MultipleChoice      |
+| 141 | bigbench/anachronisms                                                | tasksource/bigbench                       | anachronisms                                        |                 | bigbench                                     | MultipleChoice      |
+| 142 | bigbench/penguins_in_a_table                                         | tasksource/bigbench                       | penguins_in_a_table                                 |                 | bigbench                                     | MultipleChoice      |
+| 143 | bigbench/social_iqa                                                  | tasksource/bigbench                       | social_iqa                                          |                 | bigbench                                     | MultipleChoice      |
+| 144 | bigbench/similarities_abstraction                                    | tasksource/bigbench                       | similarities_abstraction                            |                 | bigbench                                     | MultipleChoice      |
+| 145 | bigbench/hhh_alignment                                               | tasksource/bigbench                       | hhh_alignment                                       |                 | bigbench                                     | MultipleChoice      |
+| 146 | bigbench/international_phonetic_alphabet_nli                         | tasksource/bigbench                       | international_phonetic_alphabet_nli                 |                 | bigbench                                     | MultipleChoice      |
+| 147 | bigbench/nonsense_words_grammar                                      | tasksource/bigbench                       | nonsense_words_grammar                              |                 | bigbench                                     | MultipleChoice      |
+| 148 | bigbench/causal_judgment                                             | tasksource/bigbench                       | causal_judgment                                     |                 | bigbench                                     | MultipleChoice      |
+| 149 | bigbench/logical_fallacy_detection                                   | tasksource/bigbench                       | logical_fallacy_detection                           |                 | bigbench                                     | MultipleChoice      |
+| 150 | bigbench/unit_interpretation                                         | tasksource/bigbench                       | unit_interpretation                                 |                 | bigbench                                     | MultipleChoice      |
+| 151 | bigbench/moral_permissibility                                        | tasksource/bigbench                       | moral_permissibility                                |                 | bigbench                                     | MultipleChoice      |
+| 152 | bigbench/reasoning_about_colored_objects                             | tasksource/bigbench                       | reasoning_about_colored_objects                     |                 | bigbench                                     | MultipleChoice      |
+| 153 | bigbench/identify_math_theorems                                      | tasksource/bigbench                       | identify_math_theorems                              |                 | bigbench                                     | MultipleChoice      |
+| 154 | bigbench/identify_odd_metaphor                                       | tasksource/bigbench                       | identify_odd_metaphor                               |                 | bigbench                                     | MultipleChoice      |
+| 155 | bigbench/metaphor_understanding                                      | tasksource/bigbench                       | metaphor_understanding                              |                 | bigbench                                     | MultipleChoice      |
+| 156 | bigbench/odd_one_out                                                 | tasksource/bigbench                       | odd_one_out                                         |                 | bigbench                                     | MultipleChoice      |
+| 157 | bigbench/checkmate_in_one                                            | tasksource/bigbench                       | checkmate_in_one                                    |                 | bigbench                                     | MultipleChoice      |
+| 158 | bigbench/suicide_risk                                                | tasksource/bigbench                       | suicide_risk                                        |                 | bigbench                                     | MultipleChoice      |
+| 159 | bigbench/snarks                                                      | tasksource/bigbench                       | snarks                                              |                 | bigbench                                     | MultipleChoice      |
+| 160 | bigbench/logical_args                                                | tasksource/bigbench                       | logical_args                                        |                 | bigbench                                     | MultipleChoice      |
+| 161 | bigbench/dark_humor_detection                                        | tasksource/bigbench                       | dark_humor_detection                                |                 | bigbench                                     | MultipleChoice      |
+| 162 | bigbench/english_proverbs                                            | tasksource/bigbench                       | english_proverbs                                    |                 | bigbench                                     | MultipleChoice      |
+| 163 | bigbench/known_unknowns                                              | tasksource/bigbench                       | known_unknowns                                      |                 | bigbench                                     | MultipleChoice      |
+| 164 | bigbench/physical_intuition                                          | tasksource/bigbench                       | physical_intuition                                  |                 | bigbench                                     | MultipleChoice      |
+| 165 | bigbench/figure_of_speech_detection                                  | tasksource/bigbench                       | figure_of_speech_detection                          |                 | bigbench                                     | MultipleChoice      |
+| 166 | bigbench/simple_ethical_questions                                    | tasksource/bigbench                       | simple_ethical_questions                            |                 | bigbench                                     | MultipleChoice      |
+| 167 | bigbench/symbol_interpretation                                       | tasksource/bigbench                       | symbol_interpretation                               |                 | bigbench                                     | MultipleChoice      |
+| 168 | bigbench/gre_reading_comprehension                                   | tasksource/bigbench                       | gre_reading_comprehension                           |                 | bigbench                                     | MultipleChoice      |
+| 169 | bigbench/irony_identification                                        | tasksource/bigbench                       | irony_identification                                |                 | bigbench                                     | MultipleChoice      |
+| 170 | bigbench/sentence_ambiguity                                          | tasksource/bigbench                       | sentence_ambiguity                                  |                 | bigbench                                     | MultipleChoice      |
+| 171 | bigbench/social_support                                              | tasksource/bigbench                       | social_support                                      |                 | bigbench                                     | MultipleChoice      |
+| 172 | bigbench/elementary_math_qa                                          | tasksource/bigbench                       | elementary_math_qa                                  |                 | bigbench                                     | MultipleChoice      |
+| 173 | bigbench/logic_grid_puzzle                                           | tasksource/bigbench                       | logic_grid_puzzle                                   |                 | bigbench                                     | MultipleChoice      |
+| 174 | bigbench/logical_sequence                                            | tasksource/bigbench                       | logical_sequence                                    |                 | bigbench                                     | MultipleChoice      |
+| 175 | bigbench/disambiguation_qa                                           | tasksource/bigbench                       | disambiguation_qa                                   |                 | bigbench                                     | MultipleChoice      |
+| 176 | bigbench/mathematical_induction                                      | tasksource/bigbench                       | mathematical_induction                              |                 | bigbench                                     | MultipleChoice      |
+| 177 | bigbench/crash_blossom                                               | tasksource/bigbench                       | crash_blossom                                       |                 | bigbench                                     | MultipleChoice      |
+| 178 | bigbench/abstract_narrative_understanding                            | tasksource/bigbench                       | abstract_narrative_understanding                    |                 | bigbench                                     | MultipleChoice      |
+| 179 | bigbench/movie_recommendation                                        | tasksource/bigbench                       | movie_recommendation                                |                 | bigbench                                     | MultipleChoice      |
+| 180 | bigbench/conceptual_combinations                                     | tasksource/bigbench                       | conceptual_combinations                             |                 | bigbench                                     | MultipleChoice      |
+| 181 | bigbench/implicatures                                                | tasksource/bigbench                       | implicatures                                        |                 | bigbench                                     | MultipleChoice      |
+| 182 | bigbench/presuppositions_as_nli                                      | tasksource/bigbench                       | presuppositions_as_nli                              |                 | bigbench                                     | MultipleChoice      |
+| 183 | bigbench/cifar10_classification                                      | tasksource/bigbench                       | cifar10_classification                              |                 | bigbench                                     | MultipleChoice      |
+| 184 | bigbench/intent_recognition                                          | tasksource/bigbench                       | intent_recognition                                  |                 | bigbench                                     | MultipleChoice      |
+| 185 | bigbench/play_dialog_same_or_different                               | tasksource/bigbench                       | play_dialog_same_or_different                       |                 | bigbench                                     | MultipleChoice      |
+| 186 | bigbench/dyck_languages                                              | tasksource/bigbench                       | dyck_languages                                      |                 | bigbench                                     | MultipleChoice      |
+| 187 | bigbench/entailed_polarity                                           | tasksource/bigbench                       | entailed_polarity                                   |                 | bigbench                                     | MultipleChoice      |
+| 188 | bigbench/real_or_fake_text                                           | tasksource/bigbench                       | real_or_fake_text                                   |                 | bigbench                                     | MultipleChoice      |
+| 189 | bigbench/misconceptions                                              | tasksource/bigbench                       | misconceptions                                      |                 | bigbench                                     | MultipleChoice      |
+| 190 | bigbench/vitaminc_fact_verification                                  | tasksource/bigbench                       | vitaminc_fact_verification                          |                 | bigbench                                     | MultipleChoice      |
+| 191 | bigbench/cs_algorithms                                               | tasksource/bigbench                       | cs_algorithms                                       |                 | bigbench                                     | MultipleChoice      |
+| 192 | bigbench/color                                                       | tasksource/bigbench                       | color                                               |                 | bigbench                                     | MultipleChoice      |
+| 193 | bigbench/emoji_movie                                                 | tasksource/bigbench                       | emoji_movie                                         |                 | bigbench                                     | MultipleChoice      |
+| 194 | bigbench/analogical_similarity                                       | tasksource/bigbench                       | analogical_similarity                               |                 | bigbench                                     | MultipleChoice      |
+| 195 | bigbench/sports_understanding                                        | tasksource/bigbench                       | sports_understanding                                |                 | bigbench                                     | MultipleChoice      |
+| 196 | bigbench/physics                                                     | tasksource/bigbench                       | physics                                             |                 | bigbench                                     | MultipleChoice      |
+| 197 | bigbench/fact_checker                                                | tasksource/bigbench                       | fact_checker                                        |                 | bigbench                                     | MultipleChoice      |
+| 198 | bigbench/epistemic_reasoning                                         | tasksource/bigbench                       | epistemic_reasoning                                 |                 | bigbench                                     | MultipleChoice      |
+| 199 | bigbench/phrase_relatedness                                          | tasksource/bigbench                       | phrase_relatedness                                  |                 | bigbench                                     | MultipleChoice      |
+| 200 | bigbench/evaluating_information_essentiality                         | tasksource/bigbench                       | evaluating_information_essentiality                 |                 | bigbench                                     | MultipleChoice      |
+| 201 | bigbench/arithmetic                                                  | tasksource/bigbench                       | arithmetic                                          |                 | bigbench                                     | MultipleChoice      |
+| 202 | bigbench/code_line_description                                       | tasksource/bigbench                       | code_line_description                               |                 | bigbench                                     | MultipleChoice      |
+| 203 | bigbench/logical_deduction                                           | tasksource/bigbench                       | logical_deduction                                   |                 | bigbench                                     | MultipleChoice      |
+| 204 | bigbench/crass_ai                                                    | tasksource/bigbench                       | crass_ai                                            |                 | bigbench                                     | MultipleChoice      |
+| 205 | bigbench/temporal_sequences                                          | tasksource/bigbench                       | temporal_sequences                                  |                 | bigbench                                     | MultipleChoice      |
+| 206 | bigbench/general_knowledge                                           | tasksource/bigbench                       | general_knowledge                                   |                 | bigbench                                     | MultipleChoice      |
+| 207 | bigbench/implicit_relations                                          | tasksource/bigbench                       | implicit_relations                                  |                 | bigbench                                     | MultipleChoice      |
+| 208 | bigbench/navigate                                                    | tasksource/bigbench                       | navigate                                            |                 | bigbench                                     | MultipleChoice      |
+| 209 | bigbench/discourse_marker_prediction                                 | tasksource/bigbench                       | discourse_marker_prediction                         |                 | bigbench                                     | MultipleChoice      |
+| 210 | bigbench/goal_step_wikihow                                           | tasksource/bigbench                       | goal_step_wikihow                                   |                 | bigbench                                     | MultipleChoice      |
+| 211 | bigbench/emojis_emotion_prediction                                   | tasksource/bigbench                       | emojis_emotion_prediction                           |                 | bigbench                                     | MultipleChoice      |
+| 212 | bigbench/geometric_shapes                                            | tasksource/bigbench                       | geometric_shapes                                    |                 | bigbench                                     | MultipleChoice      |
+| 213 | bigbench/movie_dialog_same_or_different                              | tasksource/bigbench                       | movie_dialog_same_or_different                      |                 | bigbench                                     | MultipleChoice      |
+| 214 | bigbench/analytic_entailment                                         | tasksource/bigbench                       | analytic_entailment                                 |                 | bigbench                                     | MultipleChoice      |
+| 215 | bigbench/riddle_sense                                                | tasksource/bigbench                       | riddle_sense                                        |                 | bigbench                                     | MultipleChoice      |
+| 216 | blimp/coordinate_structure_constraint_complex_left_branch            | blimp                                     | coordinate_structure_constraint_complex_left_branch |                 | blimp_hard                                   | MultipleChoice      |
+| 217 | blimp/inchoative                                                     | blimp                                     | inchoative                                          |                 | blimp_hard                                   | MultipleChoice      |
+| 218 | blimp/wh_vs_that_with_gap                                            | blimp                                     | wh_vs_that_with_gap                                 |                 | blimp_hard                                   | MultipleChoice      |
+| 219 | blimp/animate_subject_passive                                        | blimp                                     | animate_subject_passive                             |                 | blimp_hard                                   | MultipleChoice      |
+| 220 | blimp/coordinate_structure_constraint_object_extraction              | blimp                                     | coordinate_structure_constraint_object_extraction   |                 | blimp_hard                                   | MultipleChoice      |
+| 221 | blimp/existential_there_quantifiers_2                                | blimp                                     | existential_there_quantifiers_2                     |                 | blimp_hard                                   | MultipleChoice      |
+| 222 | blimp/npi_present_2                                                  | blimp                                     | npi_present_2                                       |                 | blimp_hard                                   | MultipleChoice      |
+| 223 | blimp/wh_vs_that_with_gap_long_distance                              | blimp                                     | wh_vs_that_with_gap_long_distance                   |                 | blimp_hard                                   | MultipleChoice      |
+| 224 | blimp/wh_questions_subject_gap_long_distance                         | blimp                                     | wh_questions_subject_gap_long_distance              |                 | blimp_hard                                   | MultipleChoice      |
+| 225 | blimp/drop_argument                                                  | blimp                                     | drop_argument                                       |                 | blimp_hard                                   | MultipleChoice      |
+| 226 | blimp/sentential_subject_island                                      | blimp                                     | sentential_subject_island                           |                 | blimp_hard                                   | MultipleChoice      |
+| 227 | blimp/matrix_question_npi_licensor_present                           | blimp                                     | matrix_question_npi_licensor_present                |                 | blimp_hard                                   | MultipleChoice      |
+| 228 | blimp/principle_A_c_command                                          | blimp                                     | principle_A_c_command                               |                 | blimp_hard                                   | MultipleChoice      |
+| 229 | blimp/complex_NP_island                                              | blimp                                     | complex_NP_island                                   |                 | blimp_hard                                   | MultipleChoice      |
+| 230 | blimp/superlative_quantifiers_1                                      | blimp                                     | superlative_quantifiers_1                           |                 | blimp_hard                                   | MultipleChoice      |
+| 231 | blimp/principle_A_domain_2                                           | blimp                                     | principle_A_domain_2                                |                 | blimp_hard                                   | MultipleChoice      |
+| 232 | blimp/principle_A_reconstruction                                     | blimp                                     | principle_A_reconstruction                          |                 | blimp_hard                                   | MultipleChoice      |
+| 233 | blimp/npi_present_1                                                  | blimp                                     | npi_present_1                                       |                 | blimp_hard                                   | MultipleChoice      |
+| 234 | blimp/tough_vs_raising_1                                             | blimp                                     | tough_vs_raising_1                                  |                 | blimp_hard                                   | MultipleChoice      |
+| 235 | blimp/wh_questions_object_gap                                        | blimp                                     | wh_questions_object_gap                             |                 | blimp_hard                                   | MultipleChoice      |
+| 236 | blimp/sentential_negation_npi_scope                                  | blimp                                     | sentential_negation_npi_scope                       |                 | blimp_hard                                   | MultipleChoice      |
+| 237 | blimp/left_branch_island_echo_question                               | blimp                                     | left_branch_island_echo_question                    |                 | blimp_hard                                   | MultipleChoice      |
 | 238 | cos_e/v1.0                                                           | cos_e                                     | v1.0                                                |                 | cos_e                                        | MultipleChoice      |
 | 239 | cosmos_qa                                                            | cosmos_qa                                 |                                                     |                 | cosmos_qa                                    | MultipleChoice      |
 | 240 | dream                                                                | dream                                     |                                                     |                 | dream                                        | MultipleChoice      |
@@ -256,67 +256,67 @@
 | 253 | balanced-copa                                                        | pkavumba/balanced-copa                    |                                                     |                 | balanced_copa                                | MultipleChoice      |
 | 254 | e-CARE                                                               | 12ml/e-CARE                               |                                                     |                 | e_care                                       | MultipleChoice      |
 | 255 | art                                                                  | art                                       |                                                     |                 | art                                          | MultipleChoice      |
-| 256 | mmlu/high_school_european_history                                    | tasksource/mmlu                           | high_school_european_history                        |                 | mmlu                                         | MultipleChoice      |
-| 257 | mmlu/jurisprudence                                                   | tasksource/mmlu                           | jurisprudence                                       |                 | mmlu                                         | MultipleChoice      |
-| 258 | mmlu/logical_fallacies                                               | tasksource/mmlu                           | logical_fallacies                                   |                 | mmlu                                         | MultipleChoice      |
-| 259 | mmlu/machine_learning                                                | tasksource/mmlu                           | machine_learning                                    |                 | mmlu                                         | MultipleChoice      |
-| 260 | mmlu/high_school_physics                                             | tasksource/mmlu                           | high_school_physics                                 |                 | mmlu                                         | MultipleChoice      |
-| 261 | mmlu/high_school_psychology                                          | tasksource/mmlu                           | high_school_psychology                              |                 | mmlu                                         | MultipleChoice      |
-| 262 | mmlu/high_school_statistics                                          | tasksource/mmlu                           | high_school_statistics                              |                 | mmlu                                         | MultipleChoice      |
-| 263 | mmlu/abstract_algebra                                                | tasksource/mmlu                           | abstract_algebra                                    |                 | mmlu                                         | MultipleChoice      |
-| 264 | mmlu/college_mathematics                                             | tasksource/mmlu                           | college_mathematics                                 |                 | mmlu                                         | MultipleChoice      |
-| 265 | mmlu/management                                                      | tasksource/mmlu                           | management                                          |                 | mmlu                                         | MultipleChoice      |
-| 266 | mmlu/marketing                                                       | tasksource/mmlu                           | marketing                                           |                 | mmlu                                         | MultipleChoice      |
-| 267 | mmlu/medical_genetics                                                | tasksource/mmlu                           | medical_genetics                                    |                 | mmlu                                         | MultipleChoice      |
-| 268 | mmlu/miscellaneous                                                   | tasksource/mmlu                           | miscellaneous                                       |                 | mmlu                                         | MultipleChoice      |
-| 269 | mmlu/high_school_microeconomics                                      | tasksource/mmlu                           | high_school_microeconomics                          |                 | mmlu                                         | MultipleChoice      |
-| 270 | mmlu/high_school_mathematics                                         | tasksource/mmlu                           | high_school_mathematics                             |                 | mmlu                                         | MultipleChoice      |
-| 271 | mmlu/business_ethics                                                 | tasksource/mmlu                           | business_ethics                                     |                 | mmlu                                         | MultipleChoice      |
-| 272 | mmlu/clinical_knowledge                                              | tasksource/mmlu                           | clinical_knowledge                                  |                 | mmlu                                         | MultipleChoice      |
-| 273 | mmlu/college_biology                                                 | tasksource/mmlu                           | college_biology                                     |                 | mmlu                                         | MultipleChoice      |
-| 274 | mmlu/college_chemistry                                               | tasksource/mmlu                           | college_chemistry                                   |                 | mmlu                                         | MultipleChoice      |
-| 275 | mmlu/high_school_biology                                             | tasksource/mmlu                           | high_school_biology                                 |                 | mmlu                                         | MultipleChoice      |
-| 276 | mmlu/high_school_chemistry                                           | tasksource/mmlu                           | high_school_chemistry                               |                 | mmlu                                         | MultipleChoice      |
-| 277 | mmlu/high_school_computer_science                                    | tasksource/mmlu                           | high_school_computer_science                        |                 | mmlu                                         | MultipleChoice      |
-| 278 | mmlu/professional_psychology                                         | tasksource/mmlu                           | professional_psychology                             |                 | mmlu                                         | MultipleChoice      |
-| 279 | mmlu/public_relations                                                | tasksource/mmlu                           | public_relations                                    |                 | mmlu                                         | MultipleChoice      |
-| 280 | mmlu/security_studies                                                | tasksource/mmlu                           | security_studies                                    |                 | mmlu                                         | MultipleChoice      |
-| 281 | mmlu/sociology                                                       | tasksource/mmlu                           | sociology                                           |                 | mmlu                                         | MultipleChoice      |
-| 282 | mmlu/us_foreign_policy                                               | tasksource/mmlu                           | us_foreign_policy                                   |                 | mmlu                                         | MultipleChoice      |
-| 283 | mmlu/virology                                                        | tasksource/mmlu                           | virology                                            |                 | mmlu                                         | MultipleChoice      |
-| 284 | mmlu/moral_disputes                                                  | tasksource/mmlu                           | moral_disputes                                      |                 | mmlu                                         | MultipleChoice      |
-| 285 | mmlu/moral_scenarios                                                 | tasksource/mmlu                           | moral_scenarios                                     |                 | mmlu                                         | MultipleChoice      |
-| 286 | mmlu/nutrition                                                       | tasksource/mmlu                           | nutrition                                           |                 | mmlu                                         | MultipleChoice      |
-| 287 | mmlu/philosophy                                                      | tasksource/mmlu                           | philosophy                                          |                 | mmlu                                         | MultipleChoice      |
-| 288 | mmlu/prehistory                                                      | tasksource/mmlu                           | prehistory                                          |                 | mmlu                                         | MultipleChoice      |
-| 289 | mmlu/professional_accounting                                         | tasksource/mmlu                           | professional_accounting                             |                 | mmlu                                         | MultipleChoice      |
-| 290 | mmlu/professional_law                                                | tasksource/mmlu                           | professional_law                                    |                 | mmlu                                         | MultipleChoice      |
-| 291 | mmlu/professional_medicine                                           | tasksource/mmlu                           | professional_medicine                               |                 | mmlu                                         | MultipleChoice      |
-| 292 | mmlu/college_medicine                                                | tasksource/mmlu                           | college_medicine                                    |                 | mmlu                                         | MultipleChoice      |
-| 293 | mmlu/college_physics                                                 | tasksource/mmlu                           | college_physics                                     |                 | mmlu                                         | MultipleChoice      |
-| 294 | mmlu/computer_security                                               | tasksource/mmlu                           | computer_security                                   |                 | mmlu                                         | MultipleChoice      |
-| 295 | mmlu/conceptual_physics                                              | tasksource/mmlu                           | conceptual_physics                                  |                 | mmlu                                         | MultipleChoice      |
-| 296 | mmlu/econometrics                                                    | tasksource/mmlu                           | econometrics                                        |                 | mmlu                                         | MultipleChoice      |
-| 297 | mmlu/electrical_engineering                                          | tasksource/mmlu                           | electrical_engineering                              |                 | mmlu                                         | MultipleChoice      |
-| 298 | mmlu/elementary_mathematics                                          | tasksource/mmlu                           | elementary_mathematics                              |                 | mmlu                                         | MultipleChoice      |
-| 299 | mmlu/formal_logic                                                    | tasksource/mmlu                           | formal_logic                                        |                 | mmlu                                         | MultipleChoice      |
-| 300 | mmlu/anatomy                                                         | tasksource/mmlu                           | anatomy                                             |                 | mmlu                                         | MultipleChoice      |
-| 301 | mmlu/astronomy                                                       | tasksource/mmlu                           | astronomy                                           |                 | mmlu                                         | MultipleChoice      |
-| 302 | mmlu/high_school_geography                                           | tasksource/mmlu                           | high_school_geography                               |                 | mmlu                                         | MultipleChoice      |
-| 303 | mmlu/high_school_government_and_politics                             | tasksource/mmlu                           | high_school_government_and_politics                 |                 | mmlu                                         | MultipleChoice      |
-| 304 | mmlu/global_facts                                                    | tasksource/mmlu                           | global_facts                                        |                 | mmlu                                         | MultipleChoice      |
-| 305 | mmlu/world_religions                                                 | tasksource/mmlu                           | world_religions                                     |                 | mmlu                                         | MultipleChoice      |
-| 306 | mmlu/international_law                                               | tasksource/mmlu                           | international_law                                   |                 | mmlu                                         | MultipleChoice      |
-| 307 | mmlu/high_school_us_history                                          | tasksource/mmlu                           | high_school_us_history                              |                 | mmlu                                         | MultipleChoice      |
-| 308 | mmlu/high_school_world_history                                       | tasksource/mmlu                           | high_school_world_history                           |                 | mmlu                                         | MultipleChoice      |
-| 309 | mmlu/human_aging                                                     | tasksource/mmlu                           | human_aging                                         |                 | mmlu                                         | MultipleChoice      |
-| 310 | mmlu/human_sexuality                                                 | tasksource/mmlu                           | human_sexuality                                     |                 | mmlu                                         | MultipleChoice      |
-| 311 | mmlu/high_school_macroeconomics                                      | tasksource/mmlu                           | high_school_macroeconomics                          |                 | mmlu                                         | MultipleChoice      |
-| 312 | mmlu/college_computer_science                                        | tasksource/mmlu                           | college_computer_science                            |                 | mmlu                                         | MultipleChoice      |
+| 256 | mmlu/sociology                                                       | tasksource/mmlu                           | sociology                                           |                 | mmlu                                         | MultipleChoice      |
+| 257 | mmlu/astronomy                                                       | tasksource/mmlu                           | astronomy                                           |                 | mmlu                                         | MultipleChoice      |
+| 258 | mmlu/anatomy                                                         | tasksource/mmlu                           | anatomy                                             |                 | mmlu                                         | MultipleChoice      |
+| 259 | mmlu/professional_law                                                | tasksource/mmlu                           | professional_law                                    |                 | mmlu                                         | MultipleChoice      |
+| 260 | mmlu/professional_accounting                                         | tasksource/mmlu                           | professional_accounting                             |                 | mmlu                                         | MultipleChoice      |
+| 261 | mmlu/prehistory                                                      | tasksource/mmlu                           | prehistory                                          |                 | mmlu                                         | MultipleChoice      |
+| 262 | mmlu/medical_genetics                                                | tasksource/mmlu                           | medical_genetics                                    |                 | mmlu                                         | MultipleChoice      |
+| 263 | mmlu/marketing                                                       | tasksource/mmlu                           | marketing                                           |                 | mmlu                                         | MultipleChoice      |
+| 264 | mmlu/management                                                      | tasksource/mmlu                           | management                                          |                 | mmlu                                         | MultipleChoice      |
+| 265 | mmlu/business_ethics                                                 | tasksource/mmlu                           | business_ethics                                     |                 | mmlu                                         | MultipleChoice      |
+| 266 | mmlu/security_studies                                                | tasksource/mmlu                           | security_studies                                    |                 | mmlu                                         | MultipleChoice      |
+| 267 | mmlu/public_relations                                                | tasksource/mmlu                           | public_relations                                    |                 | mmlu                                         | MultipleChoice      |
+| 268 | mmlu/professional_psychology                                         | tasksource/mmlu                           | professional_psychology                             |                 | mmlu                                         | MultipleChoice      |
+| 269 | mmlu/professional_medicine                                           | tasksource/mmlu                           | professional_medicine                               |                 | mmlu                                         | MultipleChoice      |
+| 270 | mmlu/human_aging                                                     | tasksource/mmlu                           | human_aging                                         |                 | mmlu                                         | MultipleChoice      |
+| 271 | mmlu/high_school_world_history                                       | tasksource/mmlu                           | high_school_world_history                           |                 | mmlu                                         | MultipleChoice      |
+| 272 | mmlu/philosophy                                                      | tasksource/mmlu                           | philosophy                                          |                 | mmlu                                         | MultipleChoice      |
+| 273 | mmlu/nutrition                                                       | tasksource/mmlu                           | nutrition                                           |                 | mmlu                                         | MultipleChoice      |
+| 274 | mmlu/global_facts                                                    | tasksource/mmlu                           | global_facts                                        |                 | mmlu                                         | MultipleChoice      |
+| 275 | mmlu/us_foreign_policy                                               | tasksource/mmlu                           | us_foreign_policy                                   |                 | mmlu                                         | MultipleChoice      |
+| 276 | mmlu/college_mathematics                                             | tasksource/mmlu                           | college_mathematics                                 |                 | mmlu                                         | MultipleChoice      |
+| 277 | mmlu/college_computer_science                                        | tasksource/mmlu                           | college_computer_science                            |                 | mmlu                                         | MultipleChoice      |
+| 278 | mmlu/college_chemistry                                               | tasksource/mmlu                           | college_chemistry                                   |                 | mmlu                                         | MultipleChoice      |
+| 279 | mmlu/high_school_european_history                                    | tasksource/mmlu                           | high_school_european_history                        |                 | mmlu                                         | MultipleChoice      |
+| 280 | mmlu/high_school_computer_science                                    | tasksource/mmlu                           | high_school_computer_science                        |                 | mmlu                                         | MultipleChoice      |
+| 281 | mmlu/high_school_chemistry                                           | tasksource/mmlu                           | high_school_chemistry                               |                 | mmlu                                         | MultipleChoice      |
+| 282 | mmlu/high_school_biology                                             | tasksource/mmlu                           | high_school_biology                                 |                 | mmlu                                         | MultipleChoice      |
+| 283 | mmlu/high_school_us_history                                          | tasksource/mmlu                           | high_school_us_history                              |                 | mmlu                                         | MultipleChoice      |
+| 284 | mmlu/abstract_algebra                                                | tasksource/mmlu                           | abstract_algebra                                    |                 | mmlu                                         | MultipleChoice      |
+| 285 | mmlu/econometrics                                                    | tasksource/mmlu                           | econometrics                                        |                 | mmlu                                         | MultipleChoice      |
+| 286 | mmlu/conceptual_physics                                              | tasksource/mmlu                           | conceptual_physics                                  |                 | mmlu                                         | MultipleChoice      |
+| 287 | mmlu/computer_security                                               | tasksource/mmlu                           | computer_security                                   |                 | mmlu                                         | MultipleChoice      |
+| 288 | mmlu/college_physics                                                 | tasksource/mmlu                           | college_physics                                     |                 | mmlu                                         | MultipleChoice      |
+| 289 | mmlu/college_medicine                                                | tasksource/mmlu                           | college_medicine                                    |                 | mmlu                                         | MultipleChoice      |
+| 290 | mmlu/college_biology                                                 | tasksource/mmlu                           | college_biology                                     |                 | mmlu                                         | MultipleChoice      |
+| 291 | mmlu/clinical_knowledge                                              | tasksource/mmlu                           | clinical_knowledge                                  |                 | mmlu                                         | MultipleChoice      |
+| 292 | mmlu/moral_disputes                                                  | tasksource/mmlu                           | moral_disputes                                      |                 | mmlu                                         | MultipleChoice      |
+| 293 | mmlu/formal_logic                                                    | tasksource/mmlu                           | formal_logic                                        |                 | mmlu                                         | MultipleChoice      |
+| 294 | mmlu/elementary_mathematics                                          | tasksource/mmlu                           | elementary_mathematics                              |                 | mmlu                                         | MultipleChoice      |
+| 295 | mmlu/electrical_engineering                                          | tasksource/mmlu                           | electrical_engineering                              |                 | mmlu                                         | MultipleChoice      |
+| 296 | mmlu/world_religions                                                 | tasksource/mmlu                           | world_religions                                     |                 | mmlu                                         | MultipleChoice      |
+| 297 | mmlu/virology                                                        | tasksource/mmlu                           | virology                                            |                 | mmlu                                         | MultipleChoice      |
+| 298 | mmlu/high_school_mathematics                                         | tasksource/mmlu                           | high_school_mathematics                             |                 | mmlu                                         | MultipleChoice      |
+| 299 | mmlu/high_school_microeconomics                                      | tasksource/mmlu                           | high_school_microeconomics                          |                 | mmlu                                         | MultipleChoice      |
+| 300 | mmlu/high_school_physics                                             | tasksource/mmlu                           | high_school_physics                                 |                 | mmlu                                         | MultipleChoice      |
+| 301 | mmlu/high_school_psychology                                          | tasksource/mmlu                           | high_school_psychology                              |                 | mmlu                                         | MultipleChoice      |
+| 302 | mmlu/high_school_statistics                                          | tasksource/mmlu                           | high_school_statistics                              |                 | mmlu                                         | MultipleChoice      |
+| 303 | mmlu/human_sexuality                                                 | tasksource/mmlu                           | human_sexuality                                     |                 | mmlu                                         | MultipleChoice      |
+| 304 | mmlu/international_law                                               | tasksource/mmlu                           | international_law                                   |                 | mmlu                                         | MultipleChoice      |
+| 305 | mmlu/miscellaneous                                                   | tasksource/mmlu                           | miscellaneous                                       |                 | mmlu                                         | MultipleChoice      |
+| 306 | mmlu/logical_fallacies                                               | tasksource/mmlu                           | logical_fallacies                                   |                 | mmlu                                         | MultipleChoice      |
+| 307 | mmlu/machine_learning                                                | tasksource/mmlu                           | machine_learning                                    |                 | mmlu                                         | MultipleChoice      |
+| 308 | mmlu/high_school_geography                                           | tasksource/mmlu                           | high_school_geography                               |                 | mmlu                                         | MultipleChoice      |
+| 309 | mmlu/high_school_government_and_politics                             | tasksource/mmlu                           | high_school_government_and_politics                 |                 | mmlu                                         | MultipleChoice      |
+| 310 | mmlu/high_school_macroeconomics                                      | tasksource/mmlu                           | high_school_macroeconomics                          |                 | mmlu                                         | MultipleChoice      |
+| 311 | mmlu/jurisprudence                                                   | tasksource/mmlu                           | jurisprudence                                       |                 | mmlu                                         | MultipleChoice      |
+| 312 | mmlu/moral_scenarios                                                 | tasksource/mmlu                           | moral_scenarios                                     |                 | mmlu                                         | MultipleChoice      |
 | 313 | winogrande/winogrande_xl                                             | winogrande                                | winogrande_xl                                       |                 | winogrande                                   | MultipleChoice      |
 | 314 | codah/codah                                                          | codah                                     | codah                                               |                 | codah                                        | MultipleChoice      |
-| 315 | ai2_arc/ARC-Easy/challenge                                           | ai2_arc                                   | ARC-Easy                                            | challenge       | ai2_arc__challenge                           | MultipleChoice      |
-| 316 | ai2_arc/ARC-Challenge/challenge                                      | ai2_arc                                   | ARC-Challenge                                       | challenge       | ai2_arc__challenge                           | MultipleChoice      |
+| 315 | ai2_arc/ARC-Challenge/challenge                                      | ai2_arc                                   | ARC-Challenge                                       | challenge       | ai2_arc__challenge                           | MultipleChoice      |
+| 316 | ai2_arc/ARC-Easy/challenge                                           | ai2_arc                                   | ARC-Easy                                            | challenge       | ai2_arc__challenge                           | MultipleChoice      |
 | 317 | definite_pronoun_resolution                                          | definite_pronoun_resolution               |                                                     |                 | definite_pronoun_resolution                  | MultipleChoice      |
 | 318 | swag/regular                                                         | swag                                      | regular                                             |                 | swag___regular                               | MultipleChoice      |
 | 319 | math_qa                                                              | math_qa                                   |                                                     |                 | math_qa                                      | MultipleChoice      |
@@ -333,46 +333,46 @@
 | 330 | rumoureval_2019/RumourEval2019                                       | strombergnlp/rumoureval_2019              | RumourEval2019                                      |                 | rumoureval_2019                              | Classification      |
 | 331 | ethos/binary                                                         | ethos                                     | binary                                              |                 | ethos___binary                               | Classification      |
 | 332 | ethos/multilabel                                                     | ethos                                     | multilabel                                          |                 | ethos___multilabel                           | Classification      |
-| 333 | tweet_eval/irony                                                     | tweet_eval                                | irony                                               |                 | tweet_eval                                   | Classification      |
-| 334 | tweet_eval/hate                                                      | tweet_eval                                | hate                                                |                 | tweet_eval                                   | Classification      |
-| 335 | tweet_eval/emotion                                                   | tweet_eval                                | emotion                                             |                 | tweet_eval                                   | Classification      |
-| 336 | tweet_eval/emoji                                                     | tweet_eval                                | emoji                                               |                 | tweet_eval                                   | Classification      |
-| 337 | tweet_eval/offensive                                                 | tweet_eval                                | offensive                                           |                 | tweet_eval                                   | Classification      |
-| 338 | tweet_eval/sentiment                                                 | tweet_eval                                | sentiment                                           |                 | tweet_eval                                   | Classification      |
+| 333 | tweet_eval/sentiment                                                 | tweet_eval                                | sentiment                                           |                 | tweet_eval                                   | Classification      |
+| 334 | tweet_eval/irony                                                     | tweet_eval                                | irony                                               |                 | tweet_eval                                   | Classification      |
+| 335 | tweet_eval/offensive                                                 | tweet_eval                                | offensive                                           |                 | tweet_eval                                   | Classification      |
+| 336 | tweet_eval/hate                                                      | tweet_eval                                | hate                                                |                 | tweet_eval                                   | Classification      |
+| 337 | tweet_eval/emotion                                                   | tweet_eval                                | emotion                                             |                 | tweet_eval                                   | Classification      |
+| 338 | tweet_eval/emoji                                                     | tweet_eval                                | emoji                                               |                 | tweet_eval                                   | Classification      |
 | 339 | tweet_eval/stance_abortion                                           | tweet_eval                                | stance_abortion                                     |                 | tweet_eval_abortion                          | Classification      |
 | 340 | tweet_eval/stance_atheism                                            | tweet_eval                                | stance_atheism                                      |                 | tweet_eval_atheism                           | Classification      |
 | 341 | tweet_eval/stance_climate                                            | tweet_eval                                | stance_climate                                      |                 | tweet_eval_climate                           | Classification      |
 | 342 | tweet_eval/stance_feminist                                           | tweet_eval                                | stance_feminist                                     |                 | tweet_eval_feminist                          | Classification      |
 | 343 | tweet_eval/stance_hillary                                            | tweet_eval                                | stance_hillary                                      |                 | tweet_eval_hillary                           | Classification      |
 | 344 | discovery/discovery                                                  | discovery                                 | discovery                                           |                 | discovery                                    | Classification      |
-| 345 | pragmeval/emobank-valence                                            | pragmeval                                 | emobank-valence                                     |                 | pragmeval_1                                  | Classification      |
-| 346 | pragmeval/squinky-informativeness                                    | pragmeval                                 | squinky-informativeness                             |                 | pragmeval_1                                  | Classification      |
-| 347 | pragmeval/squinky-implicature                                        | pragmeval                                 | squinky-implicature                                 |                 | pragmeval_1                                  | Classification      |
-| 348 | pragmeval/squinky-formality                                          | pragmeval                                 | squinky-formality                                   |                 | pragmeval_1                                  | Classification      |
-| 349 | pragmeval/emobank-arousal                                            | pragmeval                                 | emobank-arousal                                     |                 | pragmeval_1                                  | Classification      |
-| 350 | pragmeval/emobank-dominance                                          | pragmeval                                 | emobank-dominance                                   |                 | pragmeval_1                                  | Classification      |
-| 351 | pragmeval/switchboard                                                | pragmeval                                 | switchboard                                         |                 | pragmeval_1                                  | Classification      |
+| 345 | pragmeval/squinky-informativeness                                    | pragmeval                                 | squinky-informativeness                             |                 | pragmeval_1                                  | Classification      |
+| 346 | pragmeval/emobank-arousal                                            | pragmeval                                 | emobank-arousal                                     |                 | pragmeval_1                                  | Classification      |
+| 347 | pragmeval/switchboard                                                | pragmeval                                 | switchboard                                         |                 | pragmeval_1                                  | Classification      |
+| 348 | pragmeval/squinky-implicature                                        | pragmeval                                 | squinky-implicature                                 |                 | pragmeval_1                                  | Classification      |
+| 349 | pragmeval/emobank-valence                                            | pragmeval                                 | emobank-valence                                     |                 | pragmeval_1                                  | Classification      |
+| 350 | pragmeval/mrda                                                       | pragmeval                                 | mrda                                                |                 | pragmeval_1                                  | Classification      |
+| 351 | pragmeval/squinky-formality                                          | pragmeval                                 | squinky-formality                                   |                 | pragmeval_1                                  | Classification      |
 | 352 | pragmeval/verifiability                                              | pragmeval                                 | verifiability                                       |                 | pragmeval_1                                  | Classification      |
-| 353 | pragmeval/mrda                                                       | pragmeval                                 | mrda                                                |                 | pragmeval_1                                  | Classification      |
-| 354 | pragmeval/persuasiveness-eloquence                                   | pragmeval                                 | persuasiveness-eloquence                            |                 | pragmeval_2                                  | Classification      |
-| 355 | pragmeval/pdtb                                                       | pragmeval                                 | pdtb                                                |                 | pragmeval_2                                  | Classification      |
-| 356 | pragmeval/persuasiveness-specificity                                 | pragmeval                                 | persuasiveness-specificity                          |                 | pragmeval_2                                  | Classification      |
-| 357 | pragmeval/persuasiveness-relevance                                   | pragmeval                                 | persuasiveness-relevance                            |                 | pragmeval_2                                  | Classification      |
-| 358 | pragmeval/persuasiveness-claimtype                                   | pragmeval                                 | persuasiveness-claimtype                            |                 | pragmeval_2                                  | Classification      |
-| 359 | pragmeval/emergent                                                   | pragmeval                                 | emergent                                            |                 | pragmeval_2                                  | Classification      |
-| 360 | pragmeval/gum                                                        | pragmeval                                 | gum                                                 |                 | pragmeval_2                                  | Classification      |
-| 361 | pragmeval/stac                                                       | pragmeval                                 | stac                                                |                 | pragmeval_2                                  | Classification      |
-| 362 | pragmeval/sarcasm                                                    | pragmeval                                 | sarcasm                                             |                 | pragmeval_2                                  | Classification      |
-| 363 | pragmeval/persuasiveness-strength                                    | pragmeval                                 | persuasiveness-strength                             |                 | pragmeval_2                                  | Classification      |
-| 364 | pragmeval/persuasiveness-premisetype                                 | pragmeval                                 | persuasiveness-premisetype                          |                 | pragmeval_2                                  | Classification      |
-| 365 | silicone/iemocap                                                     | silicone                                  | iemocap                                             |                 | silicone                                     | Classification      |
-| 366 | silicone/meld_e                                                      | silicone                                  | meld_e                                              |                 | silicone                                     | Classification      |
-| 367 | silicone/oasis                                                       | silicone                                  | oasis                                               |                 | silicone                                     | Classification      |
-| 368 | silicone/sem                                                         | silicone                                  | sem                                                 |                 | silicone                                     | Classification      |
-| 369 | silicone/dyda_da                                                     | silicone                                  | dyda_da                                             |                 | silicone                                     | Classification      |
-| 370 | silicone/meld_s                                                      | silicone                                  | meld_s                                              |                 | silicone                                     | Classification      |
-| 371 | silicone/maptask                                                     | silicone                                  | maptask                                             |                 | silicone                                     | Classification      |
-| 372 | silicone/dyda_e                                                      | silicone                                  | dyda_e                                              |                 | silicone                                     | Classification      |
+| 353 | pragmeval/emobank-dominance                                          | pragmeval                                 | emobank-dominance                                   |                 | pragmeval_1                                  | Classification      |
+| 354 | pragmeval/persuasiveness-specificity                                 | pragmeval                                 | persuasiveness-specificity                          |                 | pragmeval_2                                  | Classification      |
+| 355 | pragmeval/persuasiveness-strength                                    | pragmeval                                 | persuasiveness-strength                             |                 | pragmeval_2                                  | Classification      |
+| 356 | pragmeval/persuasiveness-claimtype                                   | pragmeval                                 | persuasiveness-claimtype                            |                 | pragmeval_2                                  | Classification      |
+| 357 | pragmeval/pdtb                                                       | pragmeval                                 | pdtb                                                |                 | pragmeval_2                                  | Classification      |
+| 358 | pragmeval/sarcasm                                                    | pragmeval                                 | sarcasm                                             |                 | pragmeval_2                                  | Classification      |
+| 359 | pragmeval/stac                                                       | pragmeval                                 | stac                                                |                 | pragmeval_2                                  | Classification      |
+| 360 | pragmeval/persuasiveness-premisetype                                 | pragmeval                                 | persuasiveness-premisetype                          |                 | pragmeval_2                                  | Classification      |
+| 361 | pragmeval/persuasiveness-eloquence                                   | pragmeval                                 | persuasiveness-eloquence                            |                 | pragmeval_2                                  | Classification      |
+| 362 | pragmeval/gum                                                        | pragmeval                                 | gum                                                 |                 | pragmeval_2                                  | Classification      |
+| 363 | pragmeval/emergent                                                   | pragmeval                                 | emergent                                            |                 | pragmeval_2                                  | Classification      |
+| 364 | pragmeval/persuasiveness-relevance                                   | pragmeval                                 | persuasiveness-relevance                            |                 | pragmeval_2                                  | Classification      |
+| 365 | silicone/dyda_da                                                     | silicone                                  | dyda_da                                             |                 | silicone                                     | Classification      |
+| 366 | silicone/dyda_e                                                      | silicone                                  | dyda_e                                              |                 | silicone                                     | Classification      |
+| 367 | silicone/maptask                                                     | silicone                                  | maptask                                             |                 | silicone                                     | Classification      |
+| 368 | silicone/meld_e                                                      | silicone                                  | meld_e                                              |                 | silicone                                     | Classification      |
+| 369 | silicone/meld_s                                                      | silicone                                  | meld_s                                              |                 | silicone                                     | Classification      |
+| 370 | silicone/sem                                                         | silicone                                  | sem                                                 |                 | silicone                                     | Classification      |
+| 371 | silicone/oasis                                                       | silicone                                  | oasis                                               |                 | silicone                                     | Classification      |
+| 372 | silicone/iemocap                                                     | silicone                                  | iemocap                                             |                 | silicone                                     | Classification      |
 | 373 | lex_glue/eurlex                                                      | lex_glue                                  | eurlex                                              |                 | lex_glue___eurlex                            | Classification      |
 | 374 | lex_glue/scotus                                                      | lex_glue                                  | scotus                                              |                 | lex_glue___scotus                            | Classification      |
 | 375 | lex_glue/ledgar                                                      | lex_glue                                  | ledgar                                              |                 | lex_glue___ledgar                            | Classification      |
@@ -403,28 +403,28 @@
 | 400 | scicite                                                              | allenai/scicite                           |                                                     |                 | scicite                                      | Classification      |
 | 401 | liar                                                                 | liar                                      |                                                     |                 | liar                                         | Classification      |
 | 402 | lexical_relation_classification/ROOT09                               | relbert/lexical_relation_classification   | ROOT09                                              |                 | relbert_lexical_relation_classification      | Classification      |
-| 403 | lexical_relation_classification/EVALution                            | relbert/lexical_relation_classification   | EVALution                                           |                 | relbert_lexical_relation_classification      | Classification      |
+| 403 | lexical_relation_classification/BLESS                                | relbert/lexical_relation_classification   | BLESS                                               |                 | relbert_lexical_relation_classification      | Classification      |
 | 404 | lexical_relation_classification/CogALexV                             | relbert/lexical_relation_classification   | CogALexV                                            |                 | relbert_lexical_relation_classification      | Classification      |
-| 405 | lexical_relation_classification/BLESS                                | relbert/lexical_relation_classification   | BLESS                                               |                 | relbert_lexical_relation_classification      | Classification      |
+| 405 | lexical_relation_classification/EVALution                            | relbert/lexical_relation_classification   | EVALution                                           |                 | relbert_lexical_relation_classification      | Classification      |
 | 406 | lexical_relation_classification/K&H+N                                | relbert/lexical_relation_classification   | K&H+N                                               |                 | relbert_lexical_relation_classification      | Classification      |
-| 407 | linguisticprobing/odd_man_out                                        | metaeval/linguisticprobing                | odd_man_out                                         |                 | metaeval_linguisticprobing                   | Classification      |
-| 408 | linguisticprobing/bigram_shift                                       | metaeval/linguisticprobing                | bigram_shift                                        |                 | metaeval_linguisticprobing                   | Classification      |
-| 409 | linguisticprobing/tree_depth                                         | metaeval/linguisticprobing                | tree_depth                                          |                 | metaeval_linguisticprobing                   | Classification      |
-| 410 | linguisticprobing/past_present                                       | metaeval/linguisticprobing                | past_present                                        |                 | metaeval_linguisticprobing                   | Classification      |
-| 411 | linguisticprobing/sentence_length                                    | metaeval/linguisticprobing                | sentence_length                                     |                 | metaeval_linguisticprobing                   | Classification      |
-| 412 | linguisticprobing/top_constituents                                   | metaeval/linguisticprobing                | top_constituents                                    |                 | metaeval_linguisticprobing                   | Classification      |
-| 413 | linguisticprobing/subj_number                                        | metaeval/linguisticprobing                | subj_number                                         |                 | metaeval_linguisticprobing                   | Classification      |
-| 414 | linguisticprobing/obj_number                                         | metaeval/linguisticprobing                | obj_number                                          |                 | metaeval_linguisticprobing                   | Classification      |
-| 415 | linguisticprobing/coordination_inversion                             | metaeval/linguisticprobing                | coordination_inversion                              |                 | metaeval_linguisticprobing                   | Classification      |
-| 416 | crowdflower/airline-sentiment                                        | metaeval/crowdflower                      | airline-sentiment                                   |                 | metaeval_crowdflower                         | Classification      |
-| 417 | crowdflower/economic-news                                            | metaeval/crowdflower                      | economic-news                                       |                 | metaeval_crowdflower                         | Classification      |
-| 418 | crowdflower/corporate-messaging                                      | metaeval/crowdflower                      | corporate-messaging                                 |                 | metaeval_crowdflower                         | Classification      |
+| 407 | linguisticprobing/coordination_inversion                             | metaeval/linguisticprobing                | coordination_inversion                              |                 | metaeval_linguisticprobing                   | Classification      |
+| 408 | linguisticprobing/obj_number                                         | metaeval/linguisticprobing                | obj_number                                          |                 | metaeval_linguisticprobing                   | Classification      |
+| 409 | linguisticprobing/past_present                                       | metaeval/linguisticprobing                | past_present                                        |                 | metaeval_linguisticprobing                   | Classification      |
+| 410 | linguisticprobing/sentence_length                                    | metaeval/linguisticprobing                | sentence_length                                     |                 | metaeval_linguisticprobing                   | Classification      |
+| 411 | linguisticprobing/subj_number                                        | metaeval/linguisticprobing                | subj_number                                         |                 | metaeval_linguisticprobing                   | Classification      |
+| 412 | linguisticprobing/odd_man_out                                        | metaeval/linguisticprobing                | odd_man_out                                         |                 | metaeval_linguisticprobing                   | Classification      |
+| 413 | linguisticprobing/tree_depth                                         | metaeval/linguisticprobing                | tree_depth                                          |                 | metaeval_linguisticprobing                   | Classification      |
+| 414 | linguisticprobing/top_constituents                                   | metaeval/linguisticprobing                | top_constituents                                    |                 | metaeval_linguisticprobing                   | Classification      |
+| 415 | linguisticprobing/bigram_shift                                       | metaeval/linguisticprobing                | bigram_shift                                        |                 | metaeval_linguisticprobing                   | Classification      |
+| 416 | crowdflower/political-media-message                                  | metaeval/crowdflower                      | political-media-message                             |                 | metaeval_crowdflower                         | Classification      |
+| 417 | crowdflower/political-media-audience                                 | metaeval/crowdflower                      | political-media-audience                            |                 | metaeval_crowdflower                         | Classification      |
+| 418 | crowdflower/economic-news                                            | metaeval/crowdflower                      | economic-news                                       |                 | metaeval_crowdflower                         | Classification      |
 | 419 | crowdflower/text_emotion                                             | metaeval/crowdflower                      | text_emotion                                        |                 | metaeval_crowdflower                         | Classification      |
-| 420 | crowdflower/political-media-message                                  | metaeval/crowdflower                      | political-media-message                             |                 | metaeval_crowdflower                         | Classification      |
-| 421 | crowdflower/political-media-bias                                     | metaeval/crowdflower                      | political-media-bias                                |                 | metaeval_crowdflower                         | Classification      |
-| 422 | crowdflower/political-media-audience                                 | metaeval/crowdflower                      | political-media-audience                            |                 | metaeval_crowdflower                         | Classification      |
-| 423 | crowdflower/sentiment_nuclear_power                                  | metaeval/crowdflower                      | sentiment_nuclear_power                             |                 | metaeval_crowdflower                         | Classification      |
-| 424 | crowdflower/tweet_global_warming                                     | metaeval/crowdflower                      | tweet_global_warming                                |                 | metaeval_crowdflower                         | Classification      |
+| 420 | crowdflower/political-media-bias                                     | metaeval/crowdflower                      | political-media-bias                                |                 | metaeval_crowdflower                         | Classification      |
+| 421 | crowdflower/airline-sentiment                                        | metaeval/crowdflower                      | airline-sentiment                                   |                 | metaeval_crowdflower                         | Classification      |
+| 422 | crowdflower/tweet_global_warming                                     | metaeval/crowdflower                      | tweet_global_warming                                |                 | metaeval_crowdflower                         | Classification      |
+| 423 | crowdflower/corporate-messaging                                      | metaeval/crowdflower                      | corporate-messaging                                 |                 | metaeval_crowdflower                         | Classification      |
+| 424 | crowdflower/sentiment_nuclear_power                                  | metaeval/crowdflower                      | sentiment_nuclear_power                             |                 | metaeval_crowdflower                         | Classification      |
 | 425 | ethics/commonsense                                                   | metaeval/ethics                           | commonsense                                         |                 | metaeval_ethics___commonsense                | Classification      |
 | 426 | ethics/deontology                                                    | metaeval/ethics                           | deontology                                          |                 | metaeval_ethics___deontology                 | Classification      |
 | 427 | ethics/justice                                                       | metaeval/ethics                           | justice                                             |                 | metaeval_ethics___justice                    | Classification      |
@@ -448,171 +448,174 @@
 | 445 | ade_corpus_v2/Ade_corpus_v2_classification                           | ade_corpus_v2                             | Ade_corpus_v2_classification                        |                 | ade_corpus_v2___Ade_corpus_v2_classification | Classification      |
 | 446 | discosense                                                           | prajjwal1/discosense                      |                                                     |                 | discosense                                   | MultipleChoice      |
 | 447 | circa                                                                | circa                                     |                                                     |                 | circa                                        | Classification      |
-| 448 | EffectiveFeedbackStudentWriting                                      | YaHi/EffectiveFeedbackStudentWriting      |                                                     |                 | effective_feedback_student_writing           | Classification      |
-| 449 | phrase_similarity                                                    | PiC/phrase_similarity                     |                                                     |                 | phrase_similarity                            | Classification      |
-| 450 | scientific-exaggeration-detection                                    | copenlu/scientific-exaggeration-detection |                                                     |                 | exaggeration_detection                       | Classification      |
-| 451 | quarel                                                               | quarel                                    |                                                     |                 | quarel                                       | Classification      |
-| 452 | fever-evidence-related/mwong--fever-related                          | mwong/fever-evidence-related              | mwong--fever-related                                |                 | mwong_fever_evidence_related                 | Classification      |
-| 453 | numer_sense                                                          | numer_sense                               |                                                     |                 | numer_sense                                  | Classification      |
-| 454 | dynasent/dynabench.dynasent.r1.all/r1                                | dynabench/dynasent                        | dynabench.dynasent.r1.all                           | r1              | dynasent__r1                                 | Classification      |
-| 455 | dynasent/dynabench.dynasent.r2.all/r2                                | dynabench/dynasent                        | dynabench.dynasent.r2.all                           | r2              | dynasent__r2                                 | Classification      |
-| 456 | Sarcasm_News_Headline                                                | raquiba/Sarcasm_News_Headline             |                                                     |                 | sarcasm_news                                 | Classification      |
-| 457 | sem_eval_2010_task_8                                                 | sem_eval_2010_task_8                      |                                                     |                 | sem_eval_2010_task_8                         | Classification      |
-| 458 | auditor_review/demo-org--auditor_review                              | demo-org/auditor_review                   | demo-org--auditor_review                            |                 | demo_org_auditor_review                      | Classification      |
-| 459 | medmcqa                                                              | medmcqa                                   |                                                     |                 | medmcqa                                      | MultipleChoice      |
-| 460 | Dynasent_Disagreement                                                | RuyuanWan/Dynasent_Disagreement           |                                                     |                 | dynasent_disagreement                        | Classification      |
-| 461 | Politeness_Disagreement                                              | RuyuanWan/Politeness_Disagreement         |                                                     |                 | politeness_disagreement                      | Classification      |
-| 462 | SBIC_Disagreement                                                    | RuyuanWan/SBIC_Disagreement               |                                                     |                 | sbic_disagreement                            | Classification      |
-| 463 | SChem_Disagreement                                                   | RuyuanWan/SChem_Disagreement              |                                                     |                 | schem_disagreement                           | Classification      |
-| 464 | Dilemmas_Disagreement                                                | RuyuanWan/Dilemmas_Disagreement           |                                                     |                 | dilemmas_disagreement                        | Classification      |
-| 465 | logiqa                                                               | lucasmccabe/logiqa                        |                                                     |                 | logiqa                                       | MultipleChoice      |
-| 466 | wiki_qa                                                              | wiki_qa                                   |                                                     |                 | wiki_qa                                      | Classification      |
-| 467 | cycic_classification                                                 | metaeval/cycic_classification             |                                                     |                 | cycic_classification                         | Classification      |
-| 468 | cycic_multiplechoice                                                 | metaeval/cycic_multiplechoice             |                                                     |                 | cycic_mc                                     | MultipleChoice      |
-| 469 | sts-companion                                                        | metaeval/sts-companion                    |                                                     |                 | sts_companion                                | Classification      |
-| 470 | commonsense_qa_2.0                                                   | metaeval/commonsense_qa_2.0               |                                                     |                 | commonsense_qa_2                             | Classification      |
-| 471 | lingnli                                                              | metaeval/lingnli                          |                                                     |                 | ling_nli                                     | Classification      |
-| 472 | monotonicity-entailment                                              | metaeval/monotonicity-entailment          |                                                     |                 | monotonicity_entailment                      | Classification      |
-| 473 | arct                                                                 | metaeval/arct                             |                                                     |                 | arct                                         | MultipleChoice      |
-| 474 | scinli                                                               | metaeval/scinli                           |                                                     |                 | scinli                                       | Classification      |
-| 475 | naturallogic                                                         | metaeval/naturallogic                     |                                                     |                 | naturallogic                                 | Classification      |
-| 476 | onestop_qa                                                           | onestop_qa                                |                                                     |                 | onestop_qa                                   | MultipleChoice      |
-| 477 | moral_stories/full                                                   | demelin/moral_stories                     | full                                                |                 | moral_stories                                | MultipleChoice      |
-| 478 | prost                                                                | corypaik/prost                            |                                                     |                 | prost                                        | MultipleChoice      |
-| 479 | dynahate                                                             | aps/dynahate                              |                                                     |                 | dyna_hate                                    | Classification      |
-| 480 | syntactic-augmentation-nli                                           | metaeval/syntactic-augmentation-nli       |                                                     |                 | syntactic_augmentation_nli                   | Classification      |
-| 481 | autotnli                                                             | metaeval/autotnli                         |                                                     |                 | autotnli                                     | Classification      |
-| 482 | CONDAQA                                                              | lasha-nlp/CONDAQA                         |                                                     |                 | conqada                                      | Classification      |
-| 483 | webgpt_comparisons                                                   | openai/webgpt_comparisons                 |                                                     |                 | webgbpt_comparisons                          | MultipleChoice      |
-| 484 | synthetic-instruct-gptj-pairwise                                     | Dahoas/synthetic-instruct-gptj-pairwise   |                                                     |                 | synthetic_instruct                           | MultipleChoice      |
-| 485 | scruples                                                             | metaeval/scruples                         |                                                     |                 | scruples                                     | Classification      |
-| 486 | wouldyourather                                                       | metaeval/wouldyourather                   |                                                     |                 | wouldyourather                               | MultipleChoice      |
-| 487 | attempto-nli                                                         | sileod/attempto-nli                       |                                                     |                 | attempto_nli                                 | Classification      |
+| 448 | phrase_similarity                                                    | PiC/phrase_similarity                     |                                                     |                 | phrase_similarity                            | Classification      |
+| 449 | scientific-exaggeration-detection                                    | copenlu/scientific-exaggeration-detection |                                                     |                 | exaggeration_detection                       | Classification      |
+| 450 | quarel                                                               | quarel                                    |                                                     |                 | quarel                                       | Classification      |
+| 451 | fever-evidence-related/mwong--fever-related                          | mwong/fever-evidence-related              | mwong--fever-related                                |                 | mwong_fever_evidence_related                 | Classification      |
+| 452 | numer_sense                                                          | numer_sense                               |                                                     |                 | numer_sense                                  | Classification      |
+| 453 | dynasent/dynabench.dynasent.r1.all/r1                                | dynabench/dynasent                        | dynabench.dynasent.r1.all                           | r1              | dynasent__r1                                 | Classification      |
+| 454 | dynasent/dynabench.dynasent.r2.all/r2                                | dynabench/dynasent                        | dynabench.dynasent.r2.all                           | r2              | dynasent__r2                                 | Classification      |
+| 455 | Sarcasm_News_Headline                                                | raquiba/Sarcasm_News_Headline             |                                                     |                 | sarcasm_news                                 | Classification      |
+| 456 | sem_eval_2010_task_8                                                 | sem_eval_2010_task_8                      |                                                     |                 | sem_eval_2010_task_8                         | Classification      |
+| 457 | auditor_review/demo-org--auditor_review                              | demo-org/auditor_review                   | demo-org--auditor_review                            |                 | demo_org_auditor_review                      | Classification      |
+| 458 | medmcqa                                                              | medmcqa                                   |                                                     |                 | medmcqa                                      | MultipleChoice      |
+| 459 | Dynasent_Disagreement                                                | RuyuanWan/Dynasent_Disagreement           |                                                     |                 | dynasent_disagreement                        | Classification      |
+| 460 | Politeness_Disagreement                                              | RuyuanWan/Politeness_Disagreement         |                                                     |                 | politeness_disagreement                      | Classification      |
+| 461 | SBIC_Disagreement                                                    | RuyuanWan/SBIC_Disagreement               |                                                     |                 | sbic_disagreement                            | Classification      |
+| 462 | SChem_Disagreement                                                   | RuyuanWan/SChem_Disagreement              |                                                     |                 | schem_disagreement                           | Classification      |
+| 463 | Dilemmas_Disagreement                                                | RuyuanWan/Dilemmas_Disagreement           |                                                     |                 | dilemmas_disagreement                        | Classification      |
+| 464 | logiqa                                                               | lucasmccabe/logiqa                        |                                                     |                 | logiqa                                       | MultipleChoice      |
+| 465 | wiki_qa                                                              | wiki_qa                                   |                                                     |                 | wiki_qa                                      | Classification      |
+| 466 | cycic_classification                                                 | metaeval/cycic_classification             |                                                     |                 | cycic_classification                         | Classification      |
+| 467 | cycic_multiplechoice                                                 | metaeval/cycic_multiplechoice             |                                                     |                 | cycic_mc                                     | MultipleChoice      |
+| 468 | sts-companion                                                        | metaeval/sts-companion                    |                                                     |                 | sts_companion                                | Classification      |
+| 469 | commonsense_qa_2.0                                                   | metaeval/commonsense_qa_2.0               |                                                     |                 | commonsense_qa_2                             | Classification      |
+| 470 | lingnli                                                              | metaeval/lingnli                          |                                                     |                 | ling_nli                                     | Classification      |
+| 471 | monotonicity-entailment                                              | metaeval/monotonicity-entailment          |                                                     |                 | monotonicity_entailment                      | Classification      |
+| 472 | arct                                                                 | metaeval/arct                             |                                                     |                 | arct                                         | MultipleChoice      |
+| 473 | scinli                                                               | metaeval/scinli                           |                                                     |                 | scinli                                       | Classification      |
+| 474 | naturallogic                                                         | metaeval/naturallogic                     |                                                     |                 | naturallogic                                 | Classification      |
+| 475 | onestop_qa                                                           | onestop_qa                                |                                                     |                 | onestop_qa                                   | MultipleChoice      |
+| 476 | moral_stories/full                                                   | demelin/moral_stories                     | full                                                |                 | moral_stories                                | MultipleChoice      |
+| 477 | prost                                                                | corypaik/prost                            |                                                     |                 | prost                                        | MultipleChoice      |
+| 478 | dynahate                                                             | aps/dynahate                              |                                                     |                 | dyna_hate                                    | Classification      |
+| 479 | syntactic-augmentation-nli                                           | metaeval/syntactic-augmentation-nli       |                                                     |                 | syntactic_augmentation_nli                   | Classification      |
+| 480 | autotnli                                                             | metaeval/autotnli                         |                                                     |                 | autotnli                                     | Classification      |
+| 481 | CONDAQA                                                              | lasha-nlp/CONDAQA                         |                                                     |                 | conqada                                      | Classification      |
+| 482 | webgpt_comparisons                                                   | openai/webgpt_comparisons                 |                                                     |                 | webgbpt_comparisons                          | MultipleChoice      |
+| 483 | synthetic-instruct-gptj-pairwise                                     | Dahoas/synthetic-instruct-gptj-pairwise   |                                                     |                 | synthetic_instruct                           | MultipleChoice      |
+| 484 | scruples                                                             | metaeval/scruples                         |                                                     |                 | scruples                                     | Classification      |
+| 485 | wouldyourather                                                       | metaeval/wouldyourather                   |                                                     |                 | wouldyourather                               | MultipleChoice      |
+| 486 | attempto-nli                                                         | sileod/attempto-nli                       |                                                     |                 | attempto_nli                                 | Classification      |
+| 487 | defeasible-nli/snli                                                  | metaeval/defeasible-nli                   | snli                                                |                 | defeasible_nli                               | Classification      |
 | 488 | defeasible-nli/atomic                                                | metaeval/defeasible-nli                   | atomic                                              |                 | defeasible_nli                               | Classification      |
-| 489 | defeasible-nli/snli                                                  | metaeval/defeasible-nli                   | snli                                                |                 | defeasible_nli                               | Classification      |
-| 490 | help-nli                                                             | metaeval/help-nli                         |                                                     |                 | help_nli                                     | Classification      |
-| 491 | nli-veridicality-transitivity                                        | metaeval/nli-veridicality-transitivity    |                                                     |                 | nli_veridicality_transitivity                | Classification      |
-| 492 | natural-language-satisfiability                                      | metaeval/natural-language-satisfiability  |                                                     |                 | nl_satisfiability                            | Classification      |
-| 493 | lonli                                                                | metaeval/lonli                            |                                                     |                 | lonli                                        | Classification      |
-| 494 | dadc-limit-nli                                                       | metaeval/dadc-limit-nli                   |                                                     |                 | dadc_limit                                   | Classification      |
-| 495 | FLUTE                                                                | ColumbiaNLP/FLUTE                         |                                                     |                 | flute                                        | Classification      |
-| 496 | strategy-qa                                                          | metaeval/strategy-qa                      |                                                     |                 | strategy_qa                                  | Classification      |
-| 497 | summarize_from_feedback/comparisons                                  | openai/summarize_from_feedback            | comparisons                                         |                 | summarize_from_feedback                      | MultipleChoice      |
-| 498 | folio                                                                | metaeval/folio                            |                                                     |                 | folio                                        | Classification      |
-| 499 | tomi-nli                                                             | metaeval/tomi-nli                         |                                                     |                 | tomi_nli                                     | Classification      |
-| 500 | avicenna                                                             | metaeval/avicenna                         |                                                     |                 | avicenna                                     | Classification      |
-| 501 | SHP                                                                  | stanfordnlp/SHP                           |                                                     |                 | shp                                          | MultipleChoice      |
-| 502 | MedQA-USMLE-4-options-hf                                             | GBaker/MedQA-USMLE-4-options-hf           |                                                     |                 | medqa_usmle                                  | MultipleChoice      |
-| 503 | wikimedqa/medwiki                                                    | sileod/wikimedqa                          | medwiki                                             |                 | wikimedqa                                    | MultipleChoice      |
-| 504 | cicero                                                               | declare-lab/cicero                        |                                                     |                 | cicero                                       | MultipleChoice      |
-| 505 | CREAK                                                                | amydeng2000/CREAK                         |                                                     |                 | creak                                        | Classification      |
-| 506 | mutual                                                               | metaeval/mutual                           |                                                     |                 | mutual                                       | MultipleChoice      |
-| 507 | NeQA                                                                 | inverse-scaling/NeQA                      |                                                     |                 | neqa                                         | MultipleChoice      |
-| 508 | quote-repetition                                                     | inverse-scaling/quote-repetition          |                                                     |                 | quote_repetition                             | MultipleChoice      |
-| 509 | redefine-math                                                        | inverse-scaling/redefine-math             |                                                     |                 | redefine_math                                | MultipleChoice      |
-| 510 | puzzte                                                               | metaeval/puzzte                           |                                                     |                 | puzzte                                       | Classification      |
-| 511 | implicatures                                                         | metaeval/implicatures                     |                                                     |                 | implicatures                                 | MultipleChoice      |
+| 489 | help-nli                                                             | metaeval/help-nli                         |                                                     |                 | help_nli                                     | Classification      |
+| 490 | nli-veridicality-transitivity                                        | metaeval/nli-veridicality-transitivity    |                                                     |                 | nli_veridicality_transitivity                | Classification      |
+| 491 | natural-language-satisfiability                                      | metaeval/natural-language-satisfiability  |                                                     |                 | nl_satisfiability                            | Classification      |
+| 492 | lonli                                                                | metaeval/lonli                            |                                                     |                 | lonli                                        | Classification      |
+| 493 | dadc-limit-nli                                                       | metaeval/dadc-limit-nli                   |                                                     |                 | dadc_limit                                   | Classification      |
+| 494 | FLUTE                                                                | ColumbiaNLP/FLUTE                         |                                                     |                 | flute                                        | Classification      |
+| 495 | strategy-qa                                                          | metaeval/strategy-qa                      |                                                     |                 | strategy_qa                                  | Classification      |
+| 496 | summarize_from_feedback/comparisons                                  | openai/summarize_from_feedback            | comparisons                                         |                 | summarize_from_feedback                      | MultipleChoice      |
+| 497 | folio                                                                | metaeval/folio                            |                                                     |                 | folio                                        | Classification      |
+| 498 | tomi-nli                                                             | metaeval/tomi-nli                         |                                                     |                 | tomi_nli                                     | Classification      |
+| 499 | avicenna                                                             | metaeval/avicenna                         |                                                     |                 | avicenna                                     | Classification      |
+| 500 | SHP                                                                  | stanfordnlp/SHP                           |                                                     |                 | shp                                          | MultipleChoice      |
+| 501 | MedQA-USMLE-4-options-hf                                             | GBaker/MedQA-USMLE-4-options-hf           |                                                     |                 | medqa_usmle                                  | MultipleChoice      |
+| 502 | wikimedqa/medwiki                                                    | sileod/wikimedqa                          | medwiki                                             |                 | wikimedqa                                    | MultipleChoice      |
+| 503 | cicero                                                               | declare-lab/cicero                        |                                                     |                 | cicero                                       | MultipleChoice      |
+| 504 | CREAK                                                                | amydeng2000/CREAK                         |                                                     |                 | creak                                        | Classification      |
+| 505 | mutual                                                               | metaeval/mutual                           |                                                     |                 | mutual                                       | MultipleChoice      |
+| 506 | NeQA                                                                 | inverse-scaling/NeQA                      |                                                     |                 | neqa                                         | MultipleChoice      |
+| 507 | quote-repetition                                                     | inverse-scaling/quote-repetition          |                                                     |                 | quote_repetition                             | MultipleChoice      |
+| 508 | redefine-math                                                        | inverse-scaling/redefine-math             |                                                     |                 | redefine_math                                | MultipleChoice      |
+| 509 | puzzte                                                               | metaeval/puzzte                           |                                                     |                 | puzzte                                       | Classification      |
+| 510 | implicatures                                                         | metaeval/implicatures                     |                                                     |                 | implicatures                                 | MultipleChoice      |
+| 511 | race/high                                                            | race                                      | high                                                |                 | race                                         | MultipleChoice      |
 | 512 | race/middle                                                          | race                                      | middle                                              |                 | race                                         | MultipleChoice      |
-| 513 | race/high                                                            | race                                      | high                                                |                 | race                                         | MultipleChoice      |
-| 514 | race-c                                                               | metaeval/race-c                           |                                                     |                 | race_c                                       | MultipleChoice      |
-| 515 | spartqa-yn                                                           | metaeval/spartqa-yn                       |                                                     |                 | spartqa_yn                                   | Classification      |
-| 516 | spartqa-mchoice                                                      | metaeval/spartqa-mchoice                  |                                                     |                 | spartqa_mc                                   | MultipleChoice      |
-| 517 | temporal-nli                                                         | metaeval/temporal-nli                     |                                                     |                 | temporal_nli                                 | Classification      |
-| 518 | riddle_sense                                                         | riddle_sense                              |                                                     |                 | riddle_sense                                 | MultipleChoice      |
-| 519 | clcd-english                                                         | metaeval/clcd-english                     |                                                     |                 | clcd                                         | Classification      |
-| 520 | twentyquestions                                                      | maximedb/twentyquestions                  |                                                     |                 | twentyquestions                              | Classification      |
-| 521 | reclor                                                               | metaeval/reclor                           |                                                     |                 | reclor                                       | MultipleChoice      |
-| 522 | counterfactually-augmented-imdb                                      | metaeval/counterfactually-augmented-imdb  |                                                     |                 | c_aug_imdb                                   | Classification      |
-| 523 | counterfactually-augmented-snli                                      | metaeval/counterfactually-augmented-snli  |                                                     |                 | c_aug_snli                                   | Classification      |
-| 524 | cnli                                                                 | metaeval/cnli                             |                                                     |                 | cnli                                         | Classification      |
-| 525 | boolq-natural-perturbations                                          | metaeval/boolq-natural-perturbations      |                                                     |                 | perturbed_boolq                              | Classification      |
-| 526 | acceptability-prediction                                             | metaeval/acceptability-prediction         |                                                     |                 | graded_acceptability                         | Classification      |
-| 527 | equate                                                               | metaeval/equate                           |                                                     |                 | equate                                       | Classification      |
-| 528 | ScienceQA_text_only                                                  | metaeval/ScienceQA_text_only              |                                                     |                 | science_qa                                   | MultipleChoice      |
-| 529 | ekar_english                                                         | Jiangjie/ekar_english                     |                                                     |                 | ekar                                         | MultipleChoice      |
-| 530 | implicit-hate-stg1                                                   | metaeval/implicit-hate-stg1               |                                                     |                 | implicit_hate                                | Classification      |
-| 531 | chaos-mnli-ambiguity                                                 | metaeval/chaos-mnli-ambiguity             |                                                     |                 | nli_unambiguity                              | Classification      |
-| 532 | headline_cause/en_simple                                             | IlyaGusev/headline_cause                  | en_simple                                           |                 | headline_cause                               | Classification      |
-| 533 | logiqa-2.0-nli                                                       | metaeval/logiqa-2.0-nli                   |                                                     |                 | logiqa_2                                     | Classification      |
-| 534 | oasst1_dense_flat/quality                                            | tasksource/oasst1_dense_flat              |                                                     | quality         | oasst1__quality                              | Classification      |
-| 535 | oasst1_dense_flat/toxicity                                           | tasksource/oasst1_dense_flat              |                                                     | toxicity        | oasst1__toxicity                             | Classification      |
-| 536 | oasst1_dense_flat/helpfulness                                        | tasksource/oasst1_dense_flat              |                                                     | helpfulness     | oasst1__helpfulness                          | Classification      |
-| 537 | PARARULE-Plus                                                        | qbao775/PARARULE-Plus                     |                                                     |                 | para_rules                                   | Classification      |
-| 538 | mindgames                                                            | sileod/mindgames                          |                                                     |                 | mindgames                                    | Classification      |
-| 539 | universal_dependencies/en_gum/deprel                                 | universal_dependencies                    | en_gum                                              | deprel          | udep__deprel                                 | TokenClassification |
-| 540 | universal_dependencies/en_partut/deprel                              | universal_dependencies                    | en_partut                                           | deprel          | udep__deprel                                 | TokenClassification |
-| 541 | universal_dependencies/en_lines/deprel                               | universal_dependencies                    | en_lines                                            | deprel          | udep__deprel                                 | TokenClassification |
-| 542 | universal_dependencies/en_ewt/deprel                                 | universal_dependencies                    | en_ewt                                              | deprel          | udep__deprel                                 | TokenClassification |
-| 543 | ambient                                                              | metaeval/ambient                          |                                                     |                 | ambient                                      | Classification      |
-| 544 | path-naturalness-prediction                                          | metaeval/path-naturalness-prediction      |                                                     |                 | path_naturalness                             | MultipleChoice      |
-| 545 | civil_comments/toxicity                                              | civil_comments                            |                                                     | toxicity        | civil_comments__toxicity                     | Classification      |
-| 546 | civil_comments/severe_toxicity                                       | civil_comments                            |                                                     | severe_toxicity | civil_comments__severe_toxicity              | Classification      |
-| 547 | civil_comments/obscene                                               | civil_comments                            |                                                     | obscene         | civil_comments__obscene                      | Classification      |
-| 548 | civil_comments/threat                                                | civil_comments                            |                                                     | threat          | civil_comments__threat                       | Classification      |
-| 549 | civil_comments/insult                                                | civil_comments                            |                                                     | insult          | civil_comments__insult                       | Classification      |
-| 550 | civil_comments/identity_attack                                       | civil_comments                            |                                                     | identity_attack | civil_comments__identity_attack              | Classification      |
-| 551 | civil_comments/sexual_explicit                                       | civil_comments                            |                                                     | sexual_explicit | civil_comments__sexual_explicit              | Classification      |
-| 552 | cloth                                                                | AndyChiang/cloth                          |                                                     |                 | cloth                                        | MultipleChoice      |
-| 553 | dgen                                                                 | AndyChiang/dgen                           |                                                     |                 | dgen                                         | MultipleChoice      |
-| 554 | oasst1_pairwise_rlhf_reward                                          | tasksource/oasst1_pairwise_rlhf_reward    |                                                     |                 | oasst_rlhf                                   | MultipleChoice      |
-| 555 | I2D2                                                                 | tasksource/I2D2                           |                                                     |                 | i2d2                                         | Classification      |
-| 556 | args_me                                                              | webis/args_me                             |                                                     |                 | arg_me                                       | Classification      |
-| 557 | Touche23-ValueEval                                                   | webis/Touche23-ValueEval                  |                                                     |                 | valueeval_stance                             | Classification      |
-| 558 | starcon                                                              | tasksource/starcon                        |                                                     |                 | starcon                                      | Classification      |
-| 559 | banking77                                                            | PolyAI/banking77                          |                                                     |                 | banking77                                    | Classification      |
-| 560 | ruletaker                                                            | tasksource/ruletaker                      |                                                     |                 | ruletaker                                    | Classification      |
-| 561 | lsat_qa/all                                                          | lighteval/lsat_qa                         | all                                                 |                 | lsat_qa                                      | MultipleChoice      |
-| 562 | ConTRoL-nli                                                          | tasksource/ConTRoL-nli                    |                                                     |                 | control                                      | Classification      |
-| 563 | tracie                                                               | tasksource/tracie                         |                                                     |                 | tracie                                       | Classification      |
-| 564 | sherliic                                                             | tasksource/sherliic                       |                                                     |                 | sherliic                                     | Classification      |
-| 565 | sen-making/1                                                         | tasksource/sen-making                     |                                                     | 1               | sen_making__1                                | MultipleChoice      |
-| 566 | sen-making/2                                                         | tasksource/sen-making                     |                                                     | 2               | sen_making__2                                | MultipleChoice      |
-| 567 | winowhy                                                              | tasksource/winowhy                        |                                                     |                 | winowhy                                      | Classification      |
-| 568 | mbib-base/cognitive-bias                                             | mediabiasgroup/mbib-base                  | cognitive-bias                                      |                 | mbib_cognitive_bias                          | Classification      |
-| 569 | mbib-base/fake-news                                                  | mediabiasgroup/mbib-base                  | fake-news                                           |                 | mbib_fake_news                               | Classification      |
-| 570 | mbib-base/gender-bias                                                | mediabiasgroup/mbib-base                  | gender-bias                                         |                 | mbib_gender_bias                             | Classification      |
-| 571 | mbib-base/hate-speech                                                | mediabiasgroup/mbib-base                  | hate-speech                                         |                 | mbib_hate_speech                             | Classification      |
-| 572 | mbib-base/linguistic-bias                                            | mediabiasgroup/mbib-base                  | linguistic-bias                                     |                 | mbib_linguistic_bias                         | Classification      |
-| 573 | mbib-base/political-bias                                             | mediabiasgroup/mbib-base                  | political-bias                                      |                 | mbib_political_bias                          | Classification      |
-| 574 | mbib-base/racial-bias                                                | mediabiasgroup/mbib-base                  | racial-bias                                         |                 | mbib_racial_bias                             | Classification      |
-| 575 | mbib-base/text-level-bias                                            | mediabiasgroup/mbib-base                  | text-level-bias                                     |                 | mbib_text_level_bias                         | Classification      |
-| 576 | robustLR                                                             | tasksource/robustLR                       |                                                     |                 | robustLR                                     | Classification      |
-| 577 | v1/gen_train234_test2to10                                            | CLUTRR/v1                                 | gen_train234_test2to10                              |                 | cluttr                                       | Classification      |
-| 578 | logical-fallacy                                                      | tasksource/logical-fallacy                |                                                     |                 | logical_fallacy                              | Classification      |
-| 579 | parade                                                               | tasksource/parade                         |                                                     |                 | parade                                       | Classification      |
-| 580 | cladder                                                              | tasksource/cladder                        |                                                     |                 | cladder                                      | Classification      |
-| 581 | subjectivity                                                         | tasksource/subjectivity                   |                                                     |                 | subjectivity                                 | Classification      |
-| 582 | MOH                                                                  | tasksource/MOH                            |                                                     |                 | moh                                          | Classification      |
-| 583 | VUAC                                                                 | tasksource/VUAC                           |                                                     |                 | vuac                                         | Classification      |
-| 584 | TroFi                                                                | tasksource/TroFi                          |                                                     |                 | trofi                                        | Classification      |
-| 585 | sharc_modified/mod                                                   | sharc_modified                            | mod                                                 |                 | sharc_classification                         | Classification      |
-| 586 | conceptrules_v2                                                      | tasksource/conceptrules_v2                |                                                     |                 | conceptrules_v2                              | Classification      |
-| 587 | disrpt/eng.dep.scidtb                                                | metaeval/disrpt                           | eng.dep.scidtb                                      |                 | scidtb                                       | Classification      |
-| 588 | conll2000                                                            | conll2000                                 |                                                     |                 | chunking                                     | TokenClassification |
-| 589 | few-nerd/supervised                                                  | DFKI-SLT/few-nerd                         | supervised                                          |                 | few_nerd                                     | TokenClassification |
-| 590 | finer-139                                                            | nlpaueb/finer-139                         |                                                     |                 | finer                                        | TokenClassification |
-| 591 | zero-shot-label-nli                                                  | tasksource/zero-shot-label-nli            |                                                     |                 | label_nli                                    | Classification      |
-| 592 | com2sense                                                            | tasksource/com2sense                      |                                                     |                 | com2sense                                    | Classification      |
-| 593 | scone                                                                | tasksource/scone                          |                                                     |                 | scone                                        | Classification      |
-| 594 | winodict                                                             | tasksource/winodict                       |                                                     |                 | winodict                                     | MultipleChoice      |
-| 595 | fool-me-twice                                                        | tasksource/fool-me-twice                  |                                                     |                 | fool_me_twice                                | Classification      |
-| 596 | monli                                                                | tasksource/monli                          |                                                     |                 | monli                                        | Classification      |
-| 597 | corr2cause                                                           | tasksource/corr2cause                     |                                                     |                 | causality                                    | Classification      |
-| 598 | lsat_qa/all                                                          | lighteval/lsat_qa                         | all                                                 |                 | lsat                                         | MultipleChoice      |
-| 599 | apt                                                                  | tasksource/apt                            |                                                     |                 | apt                                          | Classification      |
-| 600 | twitter-financial-news-sentiment                                     | zeroshot/twitter-financial-news-sentiment |                                                     |                 | financial_sentiment                          | Classification      |
-| 601 | icl-symbol-tuning-instruct                                           | tasksource/icl-symbol-tuning-instruct     |                                                     |                 | icl                                          | Classification      |
-| 602 | SpaceNLI                                                             | tasksource/SpaceNLI                       |                                                     |                 | space_nli                                    | Classification      |
-| 603 | propsegment/nli                                                      | sihaochen/propsegment                     | nli                                                 |                 | propsegment                                  | Classification      |
-| 604 | HatemojiBuild                                                        | HannahRoseKirk/HatemojiBuild              |                                                     |                 | hatemoji                                     | Classification      |
-| 605 | regset                                                               | tasksource/regset                         |                                                     |                 | regset                                       | Classification      |
-| 606 | esci                                                                 | tasksource/esci                           |                                                     |                 | esci                                         | Classification      |
-| 607 | chatbot_arena_conversations                                          | lmsys/chatbot_arena_conversations         |                                                     |                 | chatbot_arena                                | MultipleChoice      |
-| 608 | dnd_style_intents                                                    | neurae/dnd_style_intents                  |                                                     |                 | dnd_intent                                   | Classification      |
-| 609 | FLD.v2                                                               | hitachi-nlp/FLD.v2                        |                                                     |                 | fld                                          | Classification      |
-| 610 | SDOH-NLI                                                             | tasksource/SDOH-NLI                       |                                                     |                 | sdoh_nli                                     | Classification      |
-| 611 | scifact_entailment                                                   | allenai/scifact_entailment                |                                                     |                 | scifact_entailment                           | Classification      |
-| 612 | feasibilityQA                                                        | tasksource/feasibilityQA                  |                                                     |                 | feasibilityQA                                | Classification      |
-| 613 | simple_pair                                                          | tasksource/simple_pair                    |                                                     |                 | simple_pair                                  | Classification      |
-| 614 | AdjectiveScaleProbe-nli                                              | tasksource/AdjectiveScaleProbe-nli        |                                                     |                 | adjective_scale_probe                        | Classification      |
-| 615 | resnli                                                               | tasksource/resnli                         |                                                     |                 | repectively_nli                              | Classification      |
+| 513 | race-c                                                               | metaeval/race-c                           |                                                     |                 | race_c                                       | MultipleChoice      |
+| 514 | spartqa-yn                                                           | metaeval/spartqa-yn                       |                                                     |                 | spartqa_yn                                   | Classification      |
+| 515 | spartqa-mchoice                                                      | metaeval/spartqa-mchoice                  |                                                     |                 | spartqa_mc                                   | MultipleChoice      |
+| 516 | temporal-nli                                                         | metaeval/temporal-nli                     |                                                     |                 | temporal_nli                                 | Classification      |
+| 517 | riddle_sense                                                         | riddle_sense                              |                                                     |                 | riddle_sense                                 | MultipleChoice      |
+| 518 | clcd-english                                                         | metaeval/clcd-english                     |                                                     |                 | clcd                                         | Classification      |
+| 519 | twentyquestions                                                      | maximedb/twentyquestions                  |                                                     |                 | twentyquestions                              | Classification      |
+| 520 | reclor                                                               | metaeval/reclor                           |                                                     |                 | reclor                                       | MultipleChoice      |
+| 521 | counterfactually-augmented-imdb                                      | metaeval/counterfactually-augmented-imdb  |                                                     |                 | c_aug_imdb                                   | Classification      |
+| 522 | counterfactually-augmented-snli                                      | metaeval/counterfactually-augmented-snli  |                                                     |                 | c_aug_snli                                   | Classification      |
+| 523 | cnli                                                                 | metaeval/cnli                             |                                                     |                 | cnli                                         | Classification      |
+| 524 | boolq-natural-perturbations                                          | metaeval/boolq-natural-perturbations      |                                                     |                 | perturbed_boolq                              | Classification      |
+| 525 | acceptability-prediction                                             | metaeval/acceptability-prediction         |                                                     |                 | graded_acceptability                         | Classification      |
+| 526 | equate                                                               | metaeval/equate                           |                                                     |                 | equate                                       | Classification      |
+| 527 | ScienceQA_text_only                                                  | metaeval/ScienceQA_text_only              |                                                     |                 | science_qa                                   | MultipleChoice      |
+| 528 | ekar_english                                                         | Jiangjie/ekar_english                     |                                                     |                 | ekar                                         | MultipleChoice      |
+| 529 | implicit-hate-stg1                                                   | metaeval/implicit-hate-stg1               |                                                     |                 | implicit_hate                                | Classification      |
+| 530 | chaos-mnli-ambiguity                                                 | metaeval/chaos-mnli-ambiguity             |                                                     |                 | nli_unambiguity                              | Classification      |
+| 531 | headline_cause/en_simple                                             | IlyaGusev/headline_cause                  | en_simple                                           |                 | headline_cause                               | Classification      |
+| 532 | logiqa-2.0-nli                                                       | metaeval/logiqa-2.0-nli                   |                                                     |                 | logiqa_2                                     | Classification      |
+| 533 | oasst1_dense_flat/quality                                            | tasksource/oasst1_dense_flat              |                                                     | quality         | oasst1__quality                              | Classification      |
+| 534 | oasst1_dense_flat/toxicity                                           | tasksource/oasst1_dense_flat              |                                                     | toxicity        | oasst1__toxicity                             | Classification      |
+| 535 | oasst1_dense_flat/helpfulness                                        | tasksource/oasst1_dense_flat              |                                                     | helpfulness     | oasst1__helpfulness                          | Classification      |
+| 536 | PARARULE-Plus                                                        | qbao775/PARARULE-Plus                     |                                                     |                 | para_rules                                   | Classification      |
+| 537 | mindgames                                                            | sileod/mindgames                          |                                                     |                 | mindgames                                    | Classification      |
+| 538 | universal_dependencies/en_lines/deprel                               | universal_dependencies                    | en_lines                                            | deprel          | udep__deprel                                 | TokenClassification |
+| 539 | universal_dependencies/en_partut/deprel                              | universal_dependencies                    | en_partut                                           | deprel          | udep__deprel                                 | TokenClassification |
+| 540 | universal_dependencies/en_ewt/deprel                                 | universal_dependencies                    | en_ewt                                              | deprel          | udep__deprel                                 | TokenClassification |
+| 541 | universal_dependencies/en_gum/deprel                                 | universal_dependencies                    | en_gum                                              | deprel          | udep__deprel                                 | TokenClassification |
+| 542 | ambient                                                              | metaeval/ambient                          |                                                     |                 | ambient                                      | Classification      |
+| 543 | path-naturalness-prediction                                          | metaeval/path-naturalness-prediction      |                                                     |                 | path_naturalness                             | MultipleChoice      |
+| 544 | civil_comments/toxicity                                              | civil_comments                            |                                                     | toxicity        | civil_comments__toxicity                     | Classification      |
+| 545 | civil_comments/severe_toxicity                                       | civil_comments                            |                                                     | severe_toxicity | civil_comments__severe_toxicity              | Classification      |
+| 546 | civil_comments/obscene                                               | civil_comments                            |                                                     | obscene         | civil_comments__obscene                      | Classification      |
+| 547 | civil_comments/threat                                                | civil_comments                            |                                                     | threat          | civil_comments__threat                       | Classification      |
+| 548 | civil_comments/insult                                                | civil_comments                            |                                                     | insult          | civil_comments__insult                       | Classification      |
+| 549 | civil_comments/identity_attack                                       | civil_comments                            |                                                     | identity_attack | civil_comments__identity_attack              | Classification      |
+| 550 | civil_comments/sexual_explicit                                       | civil_comments                            |                                                     | sexual_explicit | civil_comments__sexual_explicit              | Classification      |
+| 551 | cloth                                                                | AndyChiang/cloth                          |                                                     |                 | cloth                                        | MultipleChoice      |
+| 552 | dgen                                                                 | AndyChiang/dgen                           |                                                     |                 | dgen                                         | MultipleChoice      |
+| 553 | oasst1_pairwise_rlhf_reward                                          | tasksource/oasst1_pairwise_rlhf_reward    |                                                     |                 | oasst_rlhf                                   | MultipleChoice      |
+| 554 | I2D2                                                                 | tasksource/I2D2                           |                                                     |                 | i2d2                                         | Classification      |
+| 555 | args_me                                                              | webis/args_me                             |                                                     |                 | arg_me                                       | Classification      |
+| 556 | Touche23-ValueEval                                                   | webis/Touche23-ValueEval                  |                                                     |                 | valueeval_stance                             | Classification      |
+| 557 | starcon                                                              | tasksource/starcon                        |                                                     |                 | starcon                                      | Classification      |
+| 558 | banking77                                                            | PolyAI/banking77                          |                                                     |                 | banking77                                    | Classification      |
+| 559 | ruletaker                                                            | tasksource/ruletaker                      |                                                     |                 | ruletaker                                    | Classification      |
+| 560 | lsat_qa/all                                                          | lighteval/lsat_qa                         | all                                                 |                 | lsat_qa                                      | MultipleChoice      |
+| 561 | ConTRoL-nli                                                          | tasksource/ConTRoL-nli                    |                                                     |                 | control                                      | Classification      |
+| 562 | tracie                                                               | tasksource/tracie                         |                                                     |                 | tracie                                       | Classification      |
+| 563 | sherliic                                                             | tasksource/sherliic                       |                                                     |                 | sherliic                                     | Classification      |
+| 564 | sen-making/1                                                         | tasksource/sen-making                     |                                                     | 1               | sen_making__1                                | MultipleChoice      |
+| 565 | sen-making/2                                                         | tasksource/sen-making                     |                                                     | 2               | sen_making__2                                | MultipleChoice      |
+| 566 | winowhy                                                              | tasksource/winowhy                        |                                                     |                 | winowhy                                      | Classification      |
+| 567 | mbib-base/cognitive-bias                                             | mediabiasgroup/mbib-base                  | cognitive-bias                                      |                 | mbib_cognitive_bias                          | Classification      |
+| 568 | mbib-base/fake-news                                                  | mediabiasgroup/mbib-base                  | fake-news                                           |                 | mbib_fake_news                               | Classification      |
+| 569 | mbib-base/gender-bias                                                | mediabiasgroup/mbib-base                  | gender-bias                                         |                 | mbib_gender_bias                             | Classification      |
+| 570 | mbib-base/hate-speech                                                | mediabiasgroup/mbib-base                  | hate-speech                                         |                 | mbib_hate_speech                             | Classification      |
+| 571 | mbib-base/linguistic-bias                                            | mediabiasgroup/mbib-base                  | linguistic-bias                                     |                 | mbib_linguistic_bias                         | Classification      |
+| 572 | mbib-base/political-bias                                             | mediabiasgroup/mbib-base                  | political-bias                                      |                 | mbib_political_bias                          | Classification      |
+| 573 | mbib-base/racial-bias                                                | mediabiasgroup/mbib-base                  | racial-bias                                         |                 | mbib_racial_bias                             | Classification      |
+| 574 | mbib-base/text-level-bias                                            | mediabiasgroup/mbib-base                  | text-level-bias                                     |                 | mbib_text_level_bias                         | Classification      |
+| 575 | robustLR                                                             | tasksource/robustLR                       |                                                     |                 | robustLR                                     | Classification      |
+| 576 | v1/gen_train234_test2to10                                            | CLUTRR/v1                                 | gen_train234_test2to10                              |                 | cluttr                                       | Classification      |
+| 577 | logical-fallacy                                                      | tasksource/logical-fallacy                |                                                     |                 | logical_fallacy                              | Classification      |
+| 578 | parade                                                               | tasksource/parade                         |                                                     |                 | parade                                       | Classification      |
+| 579 | cladder                                                              | tasksource/cladder                        |                                                     |                 | cladder                                      | Classification      |
+| 580 | subjectivity                                                         | tasksource/subjectivity                   |                                                     |                 | subjectivity                                 | Classification      |
+| 581 | MOH                                                                  | tasksource/MOH                            |                                                     |                 | moh                                          | Classification      |
+| 582 | VUAC                                                                 | tasksource/VUAC                           |                                                     |                 | vuac                                         | Classification      |
+| 583 | TroFi                                                                | tasksource/TroFi                          |                                                     |                 | trofi                                        | Classification      |
+| 584 | sharc_modified/mod                                                   | sharc_modified                            | mod                                                 |                 | sharc_classification                         | Classification      |
+| 585 | conceptrules_v2                                                      | tasksource/conceptrules_v2                |                                                     |                 | conceptrules_v2                              | Classification      |
+| 586 | disrpt/eng.dep.scidtb.rels                                           | metaeval/disrpt                           | eng.dep.scidtb.rels                                 |                 | scidtb                                       | Classification      |
+| 587 | conll2000                                                            | conll2000                                 |                                                     |                 | chunking                                     | TokenClassification |
+| 588 | few-nerd/supervised                                                  | DFKI-SLT/few-nerd                         | supervised                                          |                 | few_nerd                                     | TokenClassification |
+| 589 | finer-139                                                            | nlpaueb/finer-139                         |                                                     |                 | finer                                        | TokenClassification |
+| 590 | zero-shot-label-nli                                                  | tasksource/zero-shot-label-nli            |                                                     |                 | label_nli                                    | Classification      |
+| 591 | com2sense                                                            | tasksource/com2sense                      |                                                     |                 | com2sense                                    | Classification      |
+| 592 | scone                                                                | tasksource/scone                          |                                                     |                 | scone                                        | Classification      |
+| 593 | winodict                                                             | tasksource/winodict                       |                                                     |                 | winodict                                     | MultipleChoice      |
+| 594 | fool-me-twice                                                        | tasksource/fool-me-twice                  |                                                     |                 | fool_me_twice                                | Classification      |
+| 595 | monli                                                                | tasksource/monli                          |                                                     |                 | monli                                        | Classification      |
+| 596 | corr2cause                                                           | tasksource/corr2cause                     |                                                     |                 | causality                                    | Classification      |
+| 597 | lsat_qa/all                                                          | lighteval/lsat_qa                         | all                                                 |                 | lsat                                         | MultipleChoice      |
+| 598 | apt                                                                  | tasksource/apt                            |                                                     |                 | apt                                          | Classification      |
+| 599 | twitter-financial-news-sentiment                                     | zeroshot/twitter-financial-news-sentiment |                                                     |                 | financial_sentiment                          | Classification      |
+| 600 | icl-symbol-tuning-instruct                                           | tasksource/icl-symbol-tuning-instruct     |                                                     |                 | icl                                          | Classification      |
+| 601 | SpaceNLI                                                             | tasksource/SpaceNLI                       |                                                     |                 | space_nli                                    | Classification      |
+| 602 | propsegment/nli                                                      | sihaochen/propsegment                     | nli                                                 |                 | propsegment                                  | Classification      |
+| 603 | HatemojiBuild                                                        | HannahRoseKirk/HatemojiBuild              |                                                     |                 | hatemoji                                     | Classification      |
+| 604 | regset                                                               | tasksource/regset                         |                                                     |                 | regset                                       | Classification      |
+| 605 | esci                                                                 | tasksource/esci                           |                                                     |                 | esci                                         | Classification      |
+| 606 | chatbot_arena_conversations                                          | lmsys/chatbot_arena_conversations         |                                                     |                 | chatbot_arena                                | MultipleChoice      |
+| 607 | dnd_style_intents                                                    | neurae/dnd_style_intents                  |                                                     |                 | dnd_intent                                   | Classification      |
+| 608 | FLD.v2                                                               | hitachi-nlp/FLD.v2                        |                                                     |                 | fld                                          | Classification      |
+| 609 | SDOH-NLI                                                             | tasksource/SDOH-NLI                       |                                                     |                 | sdoh_nli                                     | Classification      |
+| 610 | scifact_entailment                                                   | allenai/scifact_entailment                |                                                     |                 | scifact_entailment                           | Classification      |
+| 611 | feasibilityQA                                                        | tasksource/feasibilityQA                  |                                                     |                 | feasibilityQA                                | Classification      |
+| 612 | simple_pair                                                          | tasksource/simple_pair                    |                                                     |                 | simple_pair                                  | Classification      |
+| 613 | AdjectiveScaleProbe-nli                                              | tasksource/AdjectiveScaleProbe-nli        |                                                     |                 | adjective_scale_probe                        | Classification      |
+| 614 | resnli                                                               | tasksource/resnli                         |                                                     |                 | repectively_nli                              | Classification      |
+| 615 | SpaRTUN                                                              | tasksource/SpaRTUN                        |                                                     |                 | spartun                                      | MultipleChoice      |
+| 616 | ReSQ                                                                 | tasksource/ReSQ                           |                                                     |                 | resq                                         | MultipleChoice      |
+| 617 | semantic_fragments_nli                                               | tasksource/semantic_fragments_nli         |                                                     |                 | semantic_fragments_nli                       | Classification      |
+| 618 | dataset_train_nli                                                    | MoritzLaurer/dataset_train_nli            |                                                     |                 | moritz_zs_nli                                | Classification      |