diff --git a/src/tasksource/.ipynb_checkpoints/access-checkpoint.py b/src/tasksource/.ipynb_checkpoints/access-checkpoint.py
deleted file mode 100644
index bb49a19..0000000
--- a/src/tasksource/.ipynb_checkpoints/access-checkpoint.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from .preprocess import Preprocessing
-import re
-import pandas as pd
-from . import tasks, recast
-from .metadata import dataset_rank
-from datasets import load_dataset
-import funcy as fc
-import os
-import copy
-from sorcery import dict_of
-from functools import cache
-import random
-
-
-class lazy_mtasks:
-    def __getattr__(self, name):
-        from . import mtasks
-        return getattr(mtasks, name)
-
-    def __dir__(self):
-        from . import mtasks
-        return dir(mtasks)
-lmtasks=lazy_mtasks()
-
-def parse_var_name(s):
-    config_name,task_name = None,None
-    if '__' in s and '___' not in s: # dataset__task
-        dataset_name, task_name = s.split('__') 
-    elif '__' not in s.replace('___','') and '___' in s: #dataset___config
-        dataset_name, config_name = s.split('___') 
-    elif  '___' in s and '__' in s.split('___')[1]: #dataset___config__task
-        dataset_name, config_task=s.split('___')
-        config_name,task_name = config_task.split('__')
-    else: # dataset 
-        dataset_name = s
-    return dataset_name,config_name,task_name
-
-def pretty_name(x):
-    dn = x.dataset_name.split("/")[-1]   
-    cn = x.config_name if x.config_name else ""
-    tn = x.task_name if x.task_name else ""
-    return f"{dn}/{cn}/{tn}".replace('//','/').rstrip('/')
-
-@cache
-def list_tasks(tasks_path=f'{os.path.dirname(__file__)}/tasks.py',multilingual=False,instruct=False, excluded=[]):
-    if multilingual:
-        tasks_path=tasks_path.replace('/tasks.py','/mtasks.py')
-    task_order = open(tasks_path).readlines()
-    task_order = [x.split('=')[0].rstrip() for x in task_order if '=' in x]
-    task_order = [x for x in task_order if x.isidentifier()]
-    task_order = fc.flip(dict(enumerate(task_order)))
-
-    l = []
-    _tasks = (lmtasks if multilingual else tasks)
-
-    for key in dir(_tasks):
-        if key not in task_order:
-            continue
-        value=getattr(_tasks, key)
-        if isinstance(value,Preprocessing):
-            dataset_name, config_name, task_name = parse_var_name(key)
-            dataset_name = (value.dataset_name if value.dataset_name else dataset_name)
-            config_name = (value.config_name if value.config_name else config_name)
-            hasattr(value,key)
-            l+=[{'dataset_name': dataset_name,
-                 'config_name' : config_name,
-                 'task_name': task_name,
-                 'preprocessing_name': key,
-                'task_type': value.__class__.__name__,'mapping': value,
-                'rank':task_order.get(key,None)}]   
-    df=pd.DataFrame(l).explode('config_name')
-    df = df.sort_values('rank').reset_index(drop=True)
-    df['id'] = df.apply(lambda x: pretty_name(x), axis=1)
-    df.insert(0, 'id', df.pop('id'))
-    del df['rank']
-    if instruct:
-        df=df[df.id.map(lambda x: not any(a in x for a in recast.improper_labels))]
-    df=df[df.id.map(lambda x: not any(x in a for a in excluded))]
-    return df
-
-#task_df =list_tasks()
-#mtask_df =list_tasks(multilingual=True)
-
-def dict_to_query(d=dict(), **kwargs):
-    d={**d,**kwargs}
-    return '&'.join([f'`{k}`=="{v}"' for k,v in d.items()])
-
-def load_preprocessing(tasks=tasks, **kwargs):
-    _tasks_df = list_tasks(multilingual=tasks==lmtasks)
-    y = _tasks_df.copy().query(dict_to_query(**kwargs)).iloc[0]
-    preprocessing= copy.copy(getattr(tasks, y.preprocessing_name))
-    for c in 'dataset_name','config_name':
-        if not isinstance(getattr(preprocessing,c), str):
-             setattr(preprocessing,c,getattr(y,c))
-    return preprocessing
-
-def load_task(id=None, dataset_name=None,config_name=None,task_name=None,preprocessing_name=None,
-         max_rows=None, max_rows_eval=None, multilingual=False, instruct=False, seed=0, **load_dataset_kwargs):
-    query = dict_of(id, dataset_name, config_name, task_name,preprocessing_name)
-    query = {k:v for k,v in query.items() if v}
-    _tasks = (lmtasks if multilingual else tasks)
-    preprocessing = load_preprocessing(_tasks, **query)
-    dataset = load_dataset(preprocessing.dataset_name, preprocessing.config_name, **load_dataset_kwargs)
-    dataset= preprocessing(dataset,max_rows, max_rows_eval)
-    dataset.task_type = preprocessing.__class__.__name__
-    if instruct:
-        dataset=recast.recast_instruct(dataset)
-    return dataset
\ No newline at end of file
diff --git a/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py b/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py
deleted file mode 100644
index f7e0778..0000000
--- a/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import random
-from datasets import DatasetDict, Dataset
-from sorcery import dict_of
-import string
-
-improper_labels =['recast/recast_kg_relations','linguisticprobing',"lex_glue/scotus","pragmeval/squinky","pragmeval/emobank",'pragmeval/persuasiveness']
-improper_labels += ['glue/stsb', 'sick/relatedness', 'joci', 'utilitarianism', 'amazon_counterfactual/en', 'toxic_conversations', 'ethos/multilabel', 'lex_glue/eurlex', 'lex_glue/unfair_tos', 'app_reviews', 'humicroedit/subtask-1', 'stackoverflow-questions', 'go_emotions/simplified', 'google_wellformed_query', 'has_part', 'blog_authorship_corpus/age', 'promptCoherence', 'Sarcasm_News_Headline', 'auditor_review/demo-org--auditor_review', 'Dynasent_Disagreement', 'Politeness_Disagreement', 'SBIC_Disagreement', 'SChem_Disagreement', 'Dilemmas_Disagreement', 'sts-companion', 'acceptability-prediction', 'chaos-mnli-ambiguity', 'headline_cause/en_simple', 'oasst1_dense_flat', 'civil_comments']
-improper_labels += ['stsb_multi_mt','MLMA_hate_speech','icl-symbol-tuning-instruct']
-
-def render_options(options):
-    options = [f'"{x}"' for x in options]
-    return f"{', '.join(options[:-1])} or {options[-1]}"
-
-def render_classification(text,options,answer):
-    example = 'A→B' if text.startswith('A:') else 'the following'
-    inputs = f'With no explanation, label {example} with either {render_options(options)}.\n{text}'
-    targets = f"{answer}."
-    return dict_of(inputs,targets)
-
-def render_token_classification(tokens,options,labels):
-    prefix = f'With no explanation, label each line with {render_options(options)} preceded by ":".\n'
-    inputs = prefix+"\n".join(tokens)
-    targets = "\n".join([':'.join(x) for x in zip(tokens,labels)])
-    return dict_of(inputs,targets)
-
-def render_multiple_choice(prompt, options, labels):
-    inputs=(prompt+'\n' if prompt else '')
-    letters = string.ascii_uppercase[:len(options)]
-    inputs=f'With no explanation, chose the best option from {render_options(letters)}. {inputs}'    
-    for letter, option in zip(letters, options):
-        inputs+=f'\n{letter}: {option}'
-    targets = f'{letters[labels]}.'
-    return dict_of(inputs, targets) 
-
-def negative_sample_options(y, labels,N=4):
-    if len(labels)<N:
-        return labels
-    else:
-        return [y]+random.sample([x for x in labels if x!=y], N-1)
-
-def shuffle_choices(x):
-    choices = sorted([k for k in x if 'choice' in k])
-    choices_texts = [x[c] for c in choices]
-    correct_choice =choices_texts[x['labels']]
-    random.shuffle(choices_texts)
-    for c, ct in zip(choices, choices_texts):
-        x[c]=ct
-    x["labels"]=choices_texts.index(correct_choice)
-    return x
-
-def recast_dataset_classification_to_mc(dataset,sep="[SEP]",N=4):
-
-    def recast_split(d,N=N):
-        labels = d.features['labels']
-        df=d.to_pandas()
-        df['inputs'] = df.sentence1
-        if "sentence2" in df:
-            df['inputs'] +=sep + df.sentence2
-
-        N=min(N, len(labels.names))
-        df['choices']=df.apply(lambda x:negative_sample_options(labels.int2str(x['labels']), labels.names,N),axis=1)     
-        df['labels']=df.apply(lambda x:x['choices'].index(labels.int2str(x['labels'])),axis=1)
-
-        for i in range(N):
-            df[f'choice{i}']= "This example is " + df.choices.map(lambda x:x[i])
-
-        choices = [f'choice{i}' for i in range(N)]
-        return Dataset.from_pandas(df[['inputs',*choices,'labels']],preserve_index=False)
-
-    return DatasetDict({k: recast_split(v) for k,v in dataset.items()})
-
-
-def recast_instruct(dataset):
-    features = dataset['train'].features
-    labels = features['labels']
-
-    if "sentence1" in features:
-        task_type='Classification'
-    if "choice0" in features:
-        task_type = "MultipleChoice"
-    if "tokens" in features:
-        task_type = "TokenClassification"
-
-    def recast_MultipleChoice(x):
-        x=shuffle_choices(x)
-        choices = sorted([k for k in x if 'choice' in k])
-        if all([x[c] in x['inputs'] for c in choices]):
-            return {"inputs":x['inputs'], 'targets': x[f"choice{x['labels']}"].strip()+"."}
-        else:
-            return render_multiple_choice(x['inputs'],[x[c] for c in choices],x['labels'])
-
-    def recast_TokenClassification(x):
-        distractors = list(labels.feature.names)
-        x_labels = [labels.feature.int2str(y) for y in x['labels']]
-        labels_set= list({labels.feature.int2str(y) for y in x['labels']})
-        options=list(dict.fromkeys(labels_set+distractors))[:max(len(labels_set),10)]
-        return render_token_classification(x['tokens'],options,x_labels)
-
-    def recast_Classification(x):
-        if 'sentence2' in x:
-            text=f"A: {x['sentence1']}\nB: {x['sentence2']}"
-        else:
-            text=x['sentence1']
-            
-        answer=labels.int2str(x['labels']).strip()
-        options= negative_sample_options(answer, labels._int2str)
-        return render_classification(text, options, answer)
-        
-    dataset = dataset.map(eval(f"recast_{task_type}"))
-    dataset = dataset.remove_columns([k for k in features if k not in ['inputs','targets']])
-    return dataset
- 
\ No newline at end of file
diff --git a/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py b/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py
deleted file mode 100755
index 8513a55..0000000
--- a/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py
+++ /dev/null
@@ -1,1106 +0,0 @@
-from .preprocess import cat, get, regen, name, constant, Classification, TokenClassification, MultipleChoice
-from .metadata import bigbench_discriminative_english, blimp_hard, imppres_presupposition, imppres_implicature, udep_en_configs, udep_en_labels
-from datasets import get_dataset_config_names, Sequence, ClassLabel, Dataset, DatasetDict
-
-# variable name: dataset___config__task
-
-###################### NLI/paraphrase ###############################
-
-glue___mnli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["train", None, "validation_matched"])
-glue___qnli = Classification("question","sentence", labels="label")
-glue___rte = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
-glue___wnli = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
-#glue___ax = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["test", None, None]) # fully masked
-
-glue___mrpc = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
-glue___qqp = Classification(sentence1="question1", sentence2="question2", labels="label")
-glue___stsb = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
-
-super_glue___boolq = Classification(sentence1="question", labels="label")
-super_glue___cb = Classification(sentence1="premise", sentence2="hypothesis", labels="label")
-super_glue___multirc = Classification(
-    cat(["paragraph", "question"]),
-    'answer',
-    labels='label'
-)
-#super_glue___rte = Classification(sentence1="premise", sentence2="hypothesis", labels="label") # in glue
-super_glue___wic = Classification(
-    sentence1=cat(["word","sentence1"], " : "),
-    sentence2=cat(["word","sentence2"], " : "),
-    labels='label'
-)
-super_glue___axg = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["test", None, None])
-
-
-anli__a1 = Classification('premise','hypothesis','label', splits=['train_r1','dev_r1','test_r1'])
-anli__a2 = Classification('premise','hypothesis','label', splits=['train_r2','dev_r2','test_r2'])
-anli__a3 = Classification('premise','hypothesis','label', splits=['train_r3','dev_r3','test_r3'])
-
-
-babi_nli = Classification("premise", "hypothesis", "label",
-    dataset_name="metaeval/babi_nli",
-    config_name=set(get_dataset_config_names("metaeval/babi_nli"))-{"agents-motivations"}
-) # agents-motivations task is not as clear-cut as the others
-
-
-sick__label         = Classification('sentence_A','sentence_B','label')
-sick__relatedness   = Classification('sentence_A','sentence_B','relatedness_score')
-sick__entailment_AB = Classification('sentence_A','sentence_B','entailment_AB')
-#sick__entailment_BA = Classification('sentence_A','sentence_B','entailment_BA')
-
-def remove_neg_1(dataset):
-    return dataset.filter(lambda x:x['labels']!=-1)
-
-snli = Classification(sentence1="premise", sentence2="hypothesis", labels="label",
-    post_process=remove_neg_1)
-
-scitail = Classification("sentence1","sentence2","gold_label",config_name="snli_format")
-
-hans = Classification(sentence1="premise", sentence2="hypothesis", labels="label")
-
-wanli = Classification('premise','hypothesis','gold', dataset_name="alisawuffles/WANLI")
-
-recast_nli = Classification(sentence1="context", sentence2="hypothesis", labels="label", dataset_name="metaeval/recast",
-    config_name=['recast_kg_relations', 'recast_puns', 'recast_factuality', 'recast_verbnet',
-    'recast_verbcorner', 'recast_ner', 'recast_sentiment', 'recast_megaveridicality'])
-
-
-probability_words_nli = Classification(sentence1="context", sentence2="hypothesis", labels="label",
-    dataset_name="sileod/probability_words_nli", 
-    config_name=["reasoning_1hop","reasoning_2hop","usnli"])
-
-nan_nli = Classification("premise", "hypothesis", "label", dataset_name="joey234/nan-nli", config_name="joey234--nan-nli")
-
-nli_fever = Classification("premise","hypothesis","label",
-    dataset_name="pietrolesci/nli_fever", splits=["train","dev",None])
-
-breaking_nli = Classification("sentence1","sentence2","label",
-    dataset_name="pietrolesci/breaking_nli", splits=["full",None,None])
-
-conj_nli = Classification("premise","hypothesis","label",post_process=remove_neg_1,
-    dataset_name="pietrolesci/conj_nli",splits=['train','dev',None])
-
-fracas = Classification("premise","hypothesis","label",
-    dataset_name="pietrolesci/fracas")
-
-dialogue_nli = Classification("sentence1","sentence2","label",
-    dataset_name="pietrolesci/dialogue_nli")   
-
-mpe_nli = Classification("premise","hypothesis","label",
-    dataset_name="pietrolesci/mpe",
-    splits=["train","dev","test"])  
-
-dnc_nli = Classification("context","hypothesis","label",
-    dataset_name="pietrolesci/dnc")
-
-# gpt3_nli = Classification("text_a","text_b","label",dataset_name="pietrolesci/gpt3_nli") # not sound enough
-
-recast_white__fnplus = Classification("text","hypothesis","label",
-    dataset_name="pietrolesci/recast_white",splits=['fnplus',None,None])
-recast_white__sprl = Classification("text","hypothesis","label",
-    dataset_name="pietrolesci/recast_white",splits=['sprl',None,None])
-recast_white__dpr = Classification("text","hypothesis","label",
-    dataset_name="pietrolesci/recast_white",splits=['dpr',None,None])
-
-joci = Classification("context","hypothesis",
-    labels=lambda x: [None, "impossible", "technically possible", "plausible", "likely", "very likely"][x["original_label"]],
-    pre_process=lambda ds:ds.filter(lambda x:x['original_label']!=0),
-    dataset_name="pietrolesci/joci",splits=['full',None,None])
-
-#enfever_nli = Classification("evidence","claim","label", dataset_name="ctu-aic/enfever_nli")
-
-#contrast_nli = Classification("premise", "hypothesis",	"label",dataset_name="martn-nguyen/contrast_nli") # generated
-
-robust_nli__IS_CS = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/robust_nli", splits=["IS_CS",None,None])
-robust_nli__LI_LI = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/robust_nli", splits=["LI_LI",None,None])
-robust_nli__ST_WO = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/robust_nli", splits=["ST_WO",None,None])
-robust_nli__PI_SP = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/robust_nli", splits=["PI_SP",None,None])
-robust_nli__PI_CD = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/robust_nli", splits=["PI_CD",None,None])
-robust_nli__ST_SE = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/robust_nli", splits=["ST_SE",None,None])
-robust_nli__ST_NE = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/robust_nli", splits=["ST_NE",None,None])
-robust_nli__ST_LM = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/robust_nli", splits=["ST_LM",None,None])
-robust_nli_is_sd = Classification("premise","hypothesis","label",
-    dataset_name="pietrolesci/robust_nli_is_sd")
-robust_nli_li_ts = Classification("premise","hypothesis","label",
-    dataset_name="pietrolesci/robust_nli_li_ts")
-
-gen_debiased_nli__snli_seq_z = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_seq_z",None,None])
-gen_debiased_nli__snli_z_aug = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_z_aug",None,None])
-gen_debiased_nli__snli_par_z = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_par_z",None,None])
-gen_debiased_nli__mnli_par_z = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_par_z",None,None])
-gen_debiased_nli__mnli_z_aug = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_z_aug",None,None])
-gen_debiased_nli__mnli_seq_z = Classification("premise","hypothesis","label",
-	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_seq_z",None,None])
-
-add_one_rte = Classification("premise","hypothesis","label",
-    dataset_name="pietrolesci/add_one_rte",splits=["train","dev","test"])
-
-def _imppres_post_process(ds,prefix=''):
-    # imppres entailment definition is either purely semantic or purely pragmatic
-    # because of that, we assign differentiate the labels from anli/mnli notation
-    return ds.cast_column('labels', ClassLabel(
-    names=[f'{prefix}_entailment',f'{prefix}_neutral',f'{prefix}_contradiction']))
-
-imppres__presupposition = imppres__prag = Classification("premise","hypothesis","gold_label",
-    dataset_name="metaeval/imppres", config_name=imppres_presupposition,
-    post_process=_imppres_post_process)
-
-imppres__prag = Classification("premise","hypothesis","gold_label_prag",
-    dataset_name="metaeval/imppres", config_name=imppres_implicature,
-    post_process=lambda x: _imppres_post_process(x,'pragmatic'))
-
-imppres__log = Classification("premise","hypothesis","gold_label_log",
-    dataset_name="metaeval/imppres", config_name=imppres_implicature,
-    post_process=lambda x: _imppres_post_process(x,'logical'))
-
-
-glue__diagnostics = Classification("premise","hypothesis","label",
-    dataset_name="pietrolesci/glue_diagnostics",splits=["test",None,None])
-
-hlgd = Classification("headline_a", "headline_b", labels="label")
-
-paws___labeled_final   = Classification("sentence1", "sentence2", name('label',['not_paraphrase','paraphrase']))
-paws___labeled_swap    = Classification("sentence1", "sentence2", name('label',['not_paraphrase','paraphrase']), splits=["train", None, None])
-#paws___unlabeled_final = Classification("sentence1", "sentence2", "label")
-
-#quora = Classification(get.questions.text[0], get.questions.text[1], 'is_duplicate') # in glue
-medical_questions_pairs = Classification("question_1","question_2", name("label",['False','True']))
- 
-###################### Token Classification #########################
-
-conll2003__pos_tags   = TokenClassification(tokens="tokens", labels='pos_tags')
-conll2003__chunk_tags = TokenClassification(tokens="tokens", labels='chunk_tags')
-conll2003__ner_tags   = TokenClassification(tokens="tokens", labels='ner_tags')
-
-#tner___tweebank_ner    = TokenClassification(tokens="tokens", labels="tags")
-
-######################## Multiple choice ###########################
-
-anthropic_rlhf = MultipleChoice(constant(''), ['chosen','rejected'], constant(0),
-    dataset_name="Anthropic/hh-rlhf")
-
-model_written_evals = MultipleChoice('question', choices=['answer_matching_behavior','answer_not_matching_behavior'], labels=constant(0),  
-    dataset_name="Anthropic/model-written-evals")
-
-truthful_qa___multiple_choice = MultipleChoice(
-    "question",
-    choices_list=get.mc1_targets.choices,
-    labels=constant(0)
-)
-
-fig_qa = MultipleChoice(
-    "startphrase",
-    choices=["ending1","ending2"],
-    labels="labels",
-    dataset_name="nightingal3/fig-qa",
-    splits=["train","validation",None]
-)
-
-bigbench = MultipleChoice(
-    'inputs',
-    choices_list='multiple_choice_targets',
-    labels=lambda x:x['multiple_choice_scores'].index(1) if 1 in ['multiple_choice_scores'] else -1,
-    dataset_name='tasksource/bigbench',
-    config_name=bigbench_discriminative_english - {"social_i_qa","intersect_geometry"} # english multiple choice tasks, minus duplicates
-)
-
-blimp_hard = MultipleChoice(inputs=constant(''),
-    choices=['sentence_good','sentence_bad'],
-    labels=constant(0),
-    dataset_name="blimp",
-    config_name=blimp_hard # tasks where GPT2 is at least 10% below  human accuracy
-)
-
-cos_e = MultipleChoice('question',
-    choices_list='choices',
-    labels= lambda x: x['choices_list'].index(x['answer']),
-    config_name='v1.0')
-
-cosmos_qa = MultipleChoice(cat(['context','question']),regen('answer[0-3]'),'label')
-
-dream = MultipleChoice(
-    lambda x:"\n".join(x['dialogue']+[x['question']]),
-    choices_list='choice',
-    labels=lambda x:x['choices_list'].index(x['answer'])
-)
-
-openbookqa = MultipleChoice(
-    'question_stem',
-    choices_list=get.choices.text,
-    labels='answerKey'
-)
-
-qasc = MultipleChoice(
-    'question',
-    choices_list=get.choices.text,
-    labels=lambda x: "ABCDEFGH".index(x['answerKey']),
-    splits=['train','validation',None]
-    
-)
-
-quartz = MultipleChoice(
-    'question',
-    choices_list=get.choices.text,
-    labels='answerKey'
-)
-quail = MultipleChoice(
-    cat(['context','question']),
-    choices_list='answers',
-    labels='correct_answer_id' 
-)
-
-head_qa___en = MultipleChoice("qtext",
-    choices_list = lambda x:[a['atext'] for a in x["answers"]],
-    labels = lambda x:[a['aid'] for a in x["answers"]].index(x["ra"])
-)
-
-
-sciq = MultipleChoice(
-    'question',
-    ['correct_answer']+regen('distractor[1-3]'),
-    labels=constant(0))
-
-social_i_qa = MultipleChoice(
-    'question',
-    ['answerA','answerB','answerC'],
-    'label')
-
-wiki_hop___original = MultipleChoice(
-    'question', 
-    choices_list='candidates',
-    labels=lambda x:x['choices_list'].index(x["answer"]))
-
-wiqa = MultipleChoice('question_stem',
-    choices_list = lambda x: x['choices']['text'],
-    labels='answer_label_as_choice')
-
-piqa = MultipleChoice('goal', choices=['sol1','sol2'], labels='label')
-
-hellaswag = MultipleChoice('ctx_a',
-    choices_list=lambda x: [f'{x["ctx_b"]}{e}' for e in x["endings"]],
-    labels='label', splits=['train','validation',None])
-
-super_glue___copa = MultipleChoice('premise',['choice1','choice2'],'label')
-
-balanced_copa = MultipleChoice('premise',['choice1','choice2'],'label',
-    dataset_name="pkavumba/balanced-copa")
-
-e_care = MultipleChoice('premise',['choice1','choice2'],'label',
-    dataset_name="12ml/e-CARE")
-
-art = MultipleChoice(cat(['hypothesis_1','hypothesis_2']),
-    ['observation_1','observation_2'],
-    labels=lambda x:x['label']-1,
-    splits=['train','validation',None]
-)
-
-
-mmlu = MultipleChoice('question',labels='answer',choices_list='choices',splits=['validation','dev','test'],
-    dataset_name="tasksource/mmlu",
-    config_name=get_dataset_config_names("tasksource/mmlu")
-)
-
-winogrande = MultipleChoice('sentence',['option1','option2'],'answer',config_name='winogrande_xl',
-    splits=['train','validation',None])
-
-codah = MultipleChoice('question_propmt',choices_list='candidate_answers',labels='correct_answer_idx',config_name='codah')
-
-ai2_arc__challenge = MultipleChoice('question',
-    choices_list=get.choices.text,  
-    labels=lambda x: get.choices.label(x).index(x["answerKey"]),
-    config_name=["ARC-Challenge","ARC-Easy"])
-
-definite_pronoun_resolution = MultipleChoice(
-    inputs=cat(["sentence","pronoun"],' : '),
-    choices_list='candidates',
-    labels="label",
-    splits=['train',None,'test'])
-
-swag___regular=MultipleChoice(cat(["sent1","sent2"]),regen("ending[0-3]"),"label")
-
-def _split_choices(s):
-    import re
-    return [x.rstrip(', ') for x in re.split(r'[a-e] \) (.*?)',s) if x.strip(', ')]
-
-math_qa = MultipleChoice(
-    'Problem', 
-    choices_list = lambda x: _split_choices(x['options']),
-    labels = lambda x:'abcde'.index(x['correct'])   
-)
-
-#aqua_rat___tokenized = MultipleChoice("question",choices_list="options",labels=lambda x:"ABCDE".index(x['correct'])) in math_qa
-
-
-######################## Classification (other) ########################
-glue___cola = Classification(sentence1="sentence", labels="label")
-glue___sst2 = Classification(sentence1="sentence", labels="label")
-
-utilitarianism = Classification("comparison",labels="label",
-dataset_name="metaeval/utilitarianism")
-
-amazon_counterfactual = Classification(
-    "text", labels="label",
-    dataset_name="mteb/amazon_counterfactual",
-    config_name="en")
-
-insincere_questions = Classification(
-    "text", labels="label_text",
-    dataset_name="SetFit/insincere-questions")
-
-toxic_conversations = Classification(
-    "text", labels="label",
-    dataset_name="SetFit/toxic_conversations")
-
-turingbench = Classification("Generation",labels="label",
-    dataset_name="turingbench/TuringBench",
-    splits=["train","validation",None])
-
-
-trec = Classification(sentence1="text", labels="fine_label")
-
-tals_vitaminc = Classification('claim','evidence','label', dataset_name="tals/vitaminc", config_name="tals--vitaminc")
-
-hope_edi = Classification("text", labels="label", splits=["train", "validation", None], config_name=["english"])
-
-#fever___v1_0 = Classification(sentence1="claim", labels="label", splits=["train", "paper_dev", "paper_test"], dataset_name="fever", config_name="v1.0")
-#fever___v2_0 = Classification(sentence1="claim", labels="label", splits=[None, "validation", None], dataset_name="fever", config_name="v2.0")
-
-rumoureval_2019 = Classification(
-    sentence1="source_text",
-    sentence2=lambda x: str(x["reply_text"]),
-    labels="label", dataset_name="strombergnlp/rumoureval_2019", config_name="RumourEval2019",
-    post_process=lambda ds:ds.filter(lambda x:x['labels']!=None)    
-)
-
-ethos___binary = Classification(sentence1="text", labels="label", splits=["train", None, None])
-ethos___multilabel = Classification(
-    'text',
-    labels=lambda x: [x[c] for c in
-    ['violence', 'gender', 'race', 'national_origin', 'disability', 'religion', 'sexual_orientation','directed_vs_generalized']
-    ],
-    splits=["train", None, None]
-)
-
-tweet_eval = Classification(sentence1="text", labels="label",
-    config_name=["emoji", "emotion", "hate", "irony", "offensive", "sentiment"])
-
-def stance_kwargs(topic):
-    return {
-        "sentence1": constant(f'Topic: {topic}. \n Opinion:\n'), 
-        "sentence2": "text", 
-        "labels": "label", 
-        "config_name": f"stance_{topic.lower()}",
-        "dataset_name": "tweet_eval"
-    }
-
-tweet_eval_abortion = Classification(**stance_kwargs("abortion"))
-tweet_eval_atheism  = Classification(**stance_kwargs("atheism"))
-tweet_eval_climate  = Classification(**stance_kwargs("climate"))
-tweet_eval_feminist = Classification(**stance_kwargs("feminist"))
-tweet_eval_hillary  = Classification(**stance_kwargs("Hillary"))
-
-    
-
-discovery = Classification("sentence1", "sentence2", labels="label", config_name=["discovery"])
-
-pragmeval_1 = Classification("sentence",labels="label",
-    dataset_name="pragmeval",
-    config_name= ["emobank-arousal", "emobank-dominance", "emobank-valence", "squinky-formality", "squinky-implicature", 
-    "squinky-informativeness","switchboard","mrda","verifiability"])
-
-pragmeval_2 = Classification("sentence1","sentence2",labels="label",
-    dataset_name="pragmeval",
-    config_name= ["emergent", "gum", "pdtb", "persuasiveness-claimtype", 
-    "persuasiveness-eloquence", "persuasiveness-premisetype", "persuasiveness-relevance", "persuasiveness-specificity", 
-    "persuasiveness-strength", "sarcasm","stac"])
-
-silicone = Classification("Utterance",labels="Label",
-    config_name=['dyda_da', 'dyda_e', 'iemocap', 'maptask', 'meld_e', 'meld_s', 'oasis', 'sem'] # +['swda', 'mrda'] # in pragmeval
-)
-
-#lex_glue___ecthr_a = Classification(sentence1="text", labels="labels") # too long
-#lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long
-lex_glue___eurlex = Classification(sentence1="text", labels="labels") 
-lex_glue___scotus = Classification(sentence1="text", labels="label")
-lex_glue___ledgar = Classification(sentence1="text", labels="label")
-lex_glue___unfair_tos = Classification(sentence1="text", labels="labels")
-lex_glue___case_hold = MultipleChoice("context", choices_list='endings', labels="label")
-
-language_identification = Classification("text",labels="labels", dataset_name="papluca/language-identification")
-
-################ Automatically generated (verified)##########
-
-imdb = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
-
-#
-
-rotten_tomatoes = Classification(sentence1="text", labels="label")
-
-ag_news = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
-
-yelp_review_full = Classification(sentence1="text", labels="label", splits=["train", None, "test"], config_name=["yelp_review_full"])
-
-financial_phrasebank = Classification(sentence1="sentence", labels="label", splits=["train", None, None],
-    config_name=["sentences_allagree"])
-
-poem_sentiment = Classification(sentence1="verse_text", labels="label")
-
-
-#emotion = Classification(sentence1="text", labels="label") # file not found
-
-dbpedia_14 = Classification(sentence1="content", labels="label", splits=["train", None, "test"], config_name=["dbpedia_14"])
-
-amazon_polarity = Classification(sentence1="content", labels="label", splits=["train", None, "test"], config_name=["amazon_polarity"])
-
-app_reviews = Classification("review", labels="star", splits=["train", None, None])
-
-# multi_nli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["train", "validation_matched", None]) #glue
-
-hate_speech18 = Classification(sentence1="text", labels="label", splits=["train", None, None])
-
-sms_spam = Classification(sentence1="sms", labels="label", splits=["train", None, None])
-
-humicroedit___subtask_1 = Classification("original", "edit", labels="meanGrade", dataset_name="humicroedit", config_name="subtask-1")
-humicroedit___subtask_2 = Classification(
-    sentence1=cat(['original1','edit1'],' : '),
-    sentence2=cat(['original2','edit2'],' : '),
-    labels="label", dataset_name="humicroedit", config_name="subtask-2")
-
-snips_built_in_intents = Classification(sentence1="text", labels="label", splits=["train", None, None])
-
-banking77 = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
-
-hate_speech_offensive = Classification(sentence1="tweet", labels="class", splits=["train", None, None])
-
-yahoo_answers_topics = Classification(
-    "question_title","question_content",labels="topic")
-
-stackoverflow_questions=Classification("title","body",labels="label",
-    dataset_name="pacovaldez/stackoverflow-questions")
-
-#hyperpartisan_news_detection___byarticle = Classification(sentence1="text", labels="hyperpartisan", splits=["train", None, None]) # files too heavy
-#hyperpartisan_news_detection___bypublisher = Classification(sentence1="text", labels="hyperpartisan", splits=["train","validation", None]) # files too heavy
-hyperpartisan_news = Classification("text",labels="label",dataset_name="zapsdcn/hyperpartisan_news")
-
-scierc = Classification("text",labels="label",dataset_name="zapsdcn/sciie")
-citation_intent = Classification("text",labels="label",dataset_name="zapsdcn/citation_intent")
-
-#go_emotions___raw = Classification(sentence1="text", splits=["train", None, None])
-go_emotions___simplified = Classification(sentence1="text", labels="labels")
-
-#boolq = Classification(sentence1="question", splits=["train", "validation", None]) # in superglue
-
-#ecthr_cases___alleged_violation_prediction = Classification(labels="labels", dataset_name="ecthr_cases", config_name="alleged-violation-prediction")
-#ecthr_cases___violation_prediction = Classification(labels="labels", dataset_name="ecthr_cases", config_name="violation-prediction")
-#   too long
-
-scicite = Classification(sentence1="string", labels="label",dataset_name="allenai/scicite")
-
-liar = Classification(sentence1="statement", labels="label")
-
-relbert_lexical_relation_classification = Classification(sentence1="head", sentence2="tail", labels="relation",
- dataset_name="relbert/lexical_relation_classification",
- config_name=["BLESS","CogALexV","EVALution","K&H+N","ROOT09"])
-
-
-metaeval_linguisticprobing = Classification("sentence", labels="label", dataset_name="metaeval/linguisticprobing", 
-    config_name=['subj_number',
-                'obj_number',
-                'past_present',
-                'sentence_length',
-                'top_constituents',
-                'tree_depth',
-                'coordination_inversion',
-                'odd_man_out',
-                'bigram_shift']#+['word_content'] #too many labels 
-)
-
-metaeval_crowdflower = Classification("text", labels="label",
- splits=["train", None, None], dataset_name="metaeval/crowdflower",
- config_name=['sentiment_nuclear_power',
-            'tweet_global_warming',
-            'airline-sentiment',
-            'corporate-messaging',
-            'economic-news',
-            'political-media-audience',
-            'political-media-bias',
-            'political-media-message',
-            'text_emotion']
-)
-
-metaeval_ethics___commonsense = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="commonsense")
-metaeval_ethics___deontology = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="deontology")
-metaeval_ethics___justice = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="justice")
-metaeval_ethics___virtue = Classification(sentence1="sentence1", sentence2="sentence2", labels="label", dataset_name="metaeval/ethics", config_name="virtue")
-
-emo = Classification(sentence1="text", labels="label", splits=["train", None, "test"], config_name=["emo2019"])
-
-google_wellformed_query = Classification(sentence1="content", labels="rating")
-
-tweets_hate_speech_detection = Classification(sentence1="tweet", labels="label", splits=["train", None, None])
-
-#adv_glue___adv_sst2 = Classification(sentence1="sentence", labels="label", splits=["validation", None, None])
-#adv_glue___adv_qqp = Classification(sentence1="question1", sentence2="question2", labels="label", splits=["validation", None, None])
-#adv_glue___adv_mnli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["validation", None, None])
-#adv_glue___adv_mnli_mismatched = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["validation", None, None])
-#adv_glue___adv_qnli = Classification(sentence1="question", labels="label", splits=["validation", None, None])
-#adv_glue___adv_rte = Classification(sentence1="sentence1", sentence2="sentence2", labels="label", splits=["validation", None, None])
-
-has_part = Classification("arg1","arg2", labels="score", splits=["train", None, None])
-
-wnut_17 = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["wnut_17"])
-
-ncbi_disease = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["ncbi_disease"])
-
-acronym_identification = TokenClassification(labels="labels", tokens="tokens")
-
-jnlpba = TokenClassification(tokens="tokens", labels="ner_tags", splits=["train", "validation", None], config_name=["jnlpba"])
-
-#species_800 = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["species_800"]) missing files
-
-SpeedOfMagic_ontonotes_english = TokenClassification(tokens="tokens", labels="ner_tags", dataset_name="SpeedOfMagic/ontonotes_english", config_name="SpeedOfMagic--ontonotes_english")
-
-blog_authorship_corpus__gender    = Classification(sentence1="text",labels="gender")
-blog_authorship_corpus__age       = Classification(sentence1="text",labels="age")
-#blog_authorship_corpus__horoscope = Classification(sentence1="text",labels="horoscope")
-blog_authorship_corpus__job       = Classification(sentence1="text",labels="job")
-
-launch_open_question_type = Classification(sentence1="question", labels="resolve_type", dataset_name="launch/open_question_type")
-
-health_fact = Classification(sentence1="claim", labels="label",
-    pre_process = lambda ds:ds.filter(lambda x:x['label'] not in {-1})
-)
-
-commonsense_qa = MultipleChoice(
-    "question",
-    choices_list=get.choices.text,
-    labels=lambda x: "ABCDE".index(x["answerKey"]),
-    splits=["train","validation",None]
-)
-mc_taco = Classification(
-    lambda x: f'{x["sentence"]} {x["question"]} {x["answer"]}',
-    labels="label",
-    splits=[ "validation",None,"test"]
-)
-
-ade_corpus_v2___Ade_corpus_v2_classification = Classification("text",labels="label")
-
-discosense = MultipleChoice("context",choices=regen("option\_[0-3]"),labels="label",
-    dataset_name="prajjwal1/discosense")
-    
-circa = Classification(
-    sentence1=cat(["context","question-X"]),
-    sentence2="answer-Y",
-    labels="goldstandard2", post_process=remove_neg_1)
-
-#code_x_glue_cc_defect_detection = Classification("func", labels="target")
-
-#code_x_glue_cc_clone_detection_big_clone_bench = Classification("func1", "func2", "label") # in bigbench + too heavy (100g)
-
-#code_x_glue_cc_code_refinement = MultipleChoice(
-#    constant(""), choices=["buggy","fixed"], labels=constant(0),
-#    config_name="medium")
-
-#effective_feedback_student_writing = Classification("discourse_text", 
-#labels="discourse_effectiveness",dataset_name="YaHi/EffectiveFeedbackStudentWriting")
-# discontinued /!\
-
-#promptSentiment = Classification("text",labels="label",dataset_name="Ericwang/promptSentiment")
-#promptNLI = Classification("premise","hypothesis",labels="label",dataset_name="Ericwang/promptNLI")
-#promptSpoke = Classification("text",labels="label",dataset_name="Ericwang/promptSpoke")
-#promptProficiency = Classification("text",labels="label",dataset_name="Ericwang/promptProficiency")
-#promptGrammar = Classification("text",labels="label",dataset_name="Ericwang/promptGrammar")
-#promptCoherence = Classification("text",labels="label",dataset_name="Ericwang/promptCoherence")
-
-phrase_similarity = Classification(
-    sentence1=cat(["phrase1","sentence1"], " : "),
-    sentence2=cat(["phrase2","sentence2"], " : "),
-    labels='label',
-    dataset_name="PiC/phrase_similarity"
-)
-
-exaggeration_detection = Classification(
-    sentence1="press_release_conclusion",
-    sentence2="abstract_conclusion",
-    labels="exaggeration_label", 
-    dataset_name="copenlu/scientific-exaggeration-detection"
-)
-quarel = Classification(
-    "question",
-    labels=lambda x: "AB"[x["answer_index"]]
-)
-
-mwong_fever_evidence_related = Classification(sentence1="claim", sentence2="evidence", labels="labels", splits=["train", "valid", "test"], dataset_name="mwong/fever-evidence-related", config_name="mwong--fever-related")
-
-numer_sense = Classification("sentence",labels="target",splits=["train",None,None])
-
-dynasent__r1 = Classification("sentence", labels="gold_label", 
-    dataset_name="dynabench/dynasent", config_name="dynabench.dynasent.r1.all")
-dynasent__r2 = Classification("sentence", labels="gold_label", 
-    dataset_name="dynabench/dynasent", config_name="dynabench.dynasent.r2.all")
-
-sarcasm_news = Classification("headline", labels="is_sarcastic",
-    dataset_name="raquiba/Sarcasm_News_Headline")
-
-sem_eval_2010_task_8 = Classification("sentence",labels="relation")
-
-demo_org_auditor_review = Classification(sentence1="sentence", labels="label", splits=["train", None, "test"], dataset_name="demo-org/auditor_review", config_name="demo-org--auditor_review")
-
-medmcqa = MultipleChoice("question", choices=regen('op[a-d]'),labels='cop')
-
-
-dynasent_disagreement    = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Dynasent_Disagreement")
-politeness_disagreement  = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Politeness_Disagreement")
-sbic_disagreement        = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/SBIC_Disagreement")
-schem_disagreement       = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/SChem_Disagreement")
-dilemmas_disagreement    = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Dilemmas_Disagreement")
-
-logiqa = MultipleChoice(
-    cat(["context","query"]),
-    choices_list = 'options',
-    labels = "correct_option",
-    dataset_name="lucasmccabe/logiqa"
-)
-
-#proto_qa = MultipleChoice(
-#    "question",
-#    choices_list=lambda x:x['answer-clusters']['answers'],
-#    labels=lambda x: x['answer-clusters']['count'].index(max(x['answer-clusters']['count'])),
-#    config_name='proto_qa'
-#)
-
-wiki_qa = Classification("question","answer", name("label",['False','True']))
-
-cycic_classification = Classification("question",labels=name("correct_answer",['False','True']),
-    dataset_name = "metaeval/cycic_classification")
-cycic_mc = MultipleChoice("question", choices=regen('answer\_option[0-4]'), labels="correct_answer",
-    dataset_name = "metaeval/cycic_multiplechoice")
-
-
-def _preprocess_chatgpt_detection(ex):
-    import random
-    label=random.random()<0.5
-    ex['label']=int(label)
-    ex['answer']=[str(ex['human_answers'][0]),str(ex['chatgpt_answers'][0])][label]
-    return ex
-
-#chatgpt_detection = Classification("question","answer","label",
-#    dataset_name = 'Hello-SimpleAI/HC3', config_name="all",
-#    pre_process=lambda dataset:dataset.map(_preprocess_chatgpt_detection))
-
-sts_companion = Classification("sentence1","sentence2","label",
-    dataset_name="metaeval/sts-companion")
-
-commonsense_qa_2 = Classification("question",labels="answer",
-    dataset_name="metaeval/commonsense_qa_2.0")
-
-ling_nli = Classification("premise","hypothesis","label",dataset_name="metaeval/lingnli")
-
-monotonicity_entailment = Classification("sentence1", "sentence2", "gold_label",    
-    dataset_name="metaeval/monotonicity-entailment")
-
-arct = MultipleChoice(cat(["reason","claim"]),choices=["warrant0","warrant1"],
-    labels="correctLabelW0orW1", dataset_name="metaeval/arct")
-
-scinli = Classification("sentence1", "sentence2", labels="label",
-    post_process=lambda x:x.shuffle(seed=0),
-    dataset_name="metaeval/scinli")
-
-naturallogic = Classification(" sent1 "," sent2 "," new_label ",dataset_name="metaeval/naturallogic")
-
-onestop_qa = MultipleChoice(cat(["paragraph","question"]),choices_list="answers",
-    labels=constant(0))
-
-moral_stories = MultipleChoice(cat(["situation","intention"]),
-    choices=['moral_action',"immoral_action"],labels=constant(0),
-    dataset_name="demelin/moral_stories", config_name="full")
-
-prost = MultipleChoice(cat(["context","ex_question"]), choices=['A','B','C','D'],labels="label",
-    dataset_name="corypaik/prost")
-
-dyna_hate = Classification("text",labels="label",dataset_name="aps/dynahate",splits=['train',None,None])
-
-syntactic_augmentation_nli = Classification('sentence1',"sentence2","gold_label",dataset_name="metaeval/syntactic-augmentation-nli")
-
-autotnli = Classification("premises", "hypothesis", "label", dataset_name="metaeval/autotnli")
-#equate = Classification("sentence1", "sentence2", "gold_label",dataset_name="metaeval/equate")
-
-conqada = Classification("sentence1","sentence2","label",dataset_name="lasha-nlp/CONDAQA",
-    pre_process = lambda ds:ds.filter(lambda x:x['label'] in {"DON'T KNOW","YES","NO"})
-)
-
-webgbpt_comparisons = MultipleChoice(get.question.full_text, choices=['answer_0','answer_1'],
-    labels=lambda x:int(x['score_1']>0),
-    dataset_name="openai/webgpt_comparisons")
-
-synthetic_instruct = MultipleChoice('prompt', choices=['chosen', 'rejected'],
-    labels=constant(0), dataset_name="Dahoas/synthetic-instruct-gptj-pairwise")
-
-scruples = Classification("text",labels="binarized_label",dataset_name="metaeval/scruples")
-
-wouldyourather = MultipleChoice(constant('Most people would rather:'), choices=['option_a','option_b'],
-    labels= lambda x: int(x['votes_a']<x['votes_b']),
-    dataset_name="metaeval/wouldyourather")
-
-attempto_nli = Classification("premise","hypothesis",
-    lambda x:f'race-{x["race_label"]}',
-    dataset_name="sileod/attempto-nli")
-
-defeasible_nli = Classification(cat(["Premise","Hypothesis"]),"Update",labels="UpdateType",
-    dataset_name="metaeval/defeasible-nli",config_name=['atomic', 'snli'])
-
-#defeasible_nli_social = Classification(cat(["SocialChemROT","Hypothesis"]),"Update",labels="UpdateType",
-#    dataset_name="metaeval/defeasible-nli",config_name='social')
-
-help_nli = Classification("ori_sentence","new_sentence","gold_label",
-    dataset_name="metaeval/help-nli")
-    
-nli_veridicality_transitivity = Classification("sentence1","sentence2","gold_label",
-    dataset_name="metaeval/nli-veridicality-transitivity")
-
-nl_satisfiability= Classification("sentence",labels="label",
-    dataset_name="metaeval/natural-language-satisfiability")
-
-lonli = Classification("premise","hypothesis","label",
-    dataset_name="metaeval/lonli")
-
-dadc_limit = Classification("sentence1","sentence2","label",
-    dataset_name="metaeval/dadc-limit-nli")
-
-flute = Classification("premise","hypothesis","label",
-    dataset_name="ColumbiaNLP/FLUTE")
-
-strategy_qa = Classification('question',labels='answer',
-    dataset_name="metaeval/strategy-qa",splits=['train',None,None])
-
-summarize_from_feedback = MultipleChoice(get.info.post,
-    choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']],
-    labels="choice",
-    dataset_name="openai/summarize_from_feedback", config_name="comparisons",
-    pre_process = lambda ds:ds.filter(lambda x: type(get.info.post(x))==str)
-)
-
-folio = Classification(lambda x: " ".join(x['premises']),"conclusion",
-    labels="label",
-    dataset_name="metaeval/folio")
-
-tomi_nli = Classification("premise","hypothesis","label",
-    dataset_name="metaeval/tomi-nli")
-
-avicenna = Classification("Premise 1","Premise 2","Syllogistic relation",
-    dataset_name="metaeval/avicenna")
-
-shp = MultipleChoice("history",
-    choices=['human_ref_A','human_ref_B'],
-    labels="labels",
-    dataset_name="stanfordnlp/SHP")
-
-medqa_usmle = MultipleChoice('sent1',choices=regen('ending[0-3]'),labels='label',
-    dataset_name="GBaker/MedQA-USMLE-4-options-hf")
-
-wikimedqa = MultipleChoice("text",choices=regen('option\_[0-7]'),labels='label',
-    dataset_name="sileod/wikimedqa",
-    config_name=["medwiki"])
-
-cicero = MultipleChoice(lambda x: " ".join(x['Dialogue']),
-    choices_list="Choices", labels=lambda x:x['Human Written Answer'][0],
-    dataset_name="declare-lab/cicero")
-
-creak = Classification("sentence",labels="label",
-    dataset_name='amydeng2000/CREAK')
-
-mutual = MultipleChoice("article",choices_list="options",
-    labels=lambda x: "ABCD".index(x['answers']),
-    dataset_name="metaeval/mutual",splits=["train",None,None])
-
-neqa = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
-    dataset_name="inverse-scaling/NeQA")
-quote_repetition = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
-    dataset_name="inverse-scaling/quote-repetition")
-redefine_math = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
-    dataset_name="inverse-scaling/redefine-math")
-
-puzzte = Classification("puzzle_text","question","answer",
-    dataset_name="metaeval/puzzte")
-
-implicatures = MultipleChoice(cat(['context','response'],"\n"),
-    choices=['correct_implicature','incorrect_implicature'],
-    labels=constant(0),
-    dataset_name='metaeval/implicatures')
-
-race = MultipleChoice(cat(['question','article'],'\n'), choices_list='options',
-    labels=lambda x:'ABCDE'.index(x['answer']),
-    config_name=['middle','high'])
-
-race_c = MultipleChoice(cat(['question','article'],'\n'),choices_list='option',labels='label',
-    dataset_name='metaeval/race-c')
-
-spartqa_yn=Classification("story","question","answer",
-    dataset_name="metaeval/spartqa-yn")
-
-spartqa_mc=MultipleChoice(cat(["story","question"]),choices_list="candidate_answers",labels="answer",
-    dataset_name="metaeval/spartqa-mchoice")
-
-temporal_nli = Classification("Premise","Hypothesis","Label",
-    dataset_name="metaeval/temporal-nli")
-
-riddle_sense = MultipleChoice("question", choices_list=get.choices.text, 
-    labels=lambda x : "ABCDE".index(x['answerKey']))
-
-clcd = Classification(
-    "sentence1","sentence2","label",
-    dataset_name="metaeval/clcd-english")
-
-twentyquestions = Classification("question","subject","answer",dataset_name="maximedb/twentyquestions")
-
-reclor = MultipleChoice(cat(["context","question"]),choices_list="answers",labels="label",
-    dataset_name="metaeval/reclor",splits=['train','validation',None])
-
-c_aug_imdb = Classification("Text",labels="Sentiment",
-    dataset_name='metaeval/counterfactually-augmented-imdb')
-
-c_aug_snli = Classification("sentence1","sentence2","gold_label",
-    dataset_name='metaeval/counterfactually-augmented-snli')
-
-cnli = Classification("premise","hypothesis","label",
-    dataset_name='metaeval/cnli')
-
-perturbed_boolq = Classification("question",labels="hard_label",
-    dataset_name='metaeval/boolq-natural-perturbations')
-
-#mega_acceptability = Classification("sentence",labels="average",
-#    dataset_name='metaeval/mega-acceptability-v2')
-
-graded_acceptability = Classification("text",labels="normalized_score",
-    dataset_name="metaeval/acceptability-prediction")
-
-equate = Classification("sentence1","sentence2","gold_label",
-    dataset_name='metaeval/equate')
-
-science_qa = MultipleChoice("question",choices_list="choices",labels="answer",
-    dataset_name="metaeval/ScienceQA_text_only")
-
-ekar=MultipleChoice("question",choices_list=get.choices.text,
-    labels=lambda x:"ABCD".index(x['answerKey']),
-dataset_name="Jiangjie/ekar_english")
-
-implicit_hate = Classification("post",labels="class",
-    dataset_name="metaeval/implicit-hate-stg1")
-
-nli_unambiguity = Classification("premise","hypothesis","gini",
-    dataset_name="metaeval/chaos-mnli-ambiguity")
-
-headline_cause = Classification('left_title','right_title','label',
-    dataset_name='IlyaGusev/headline_cause',config_name='en_simple')
-
-logiqa_2 = Classification("premise","hypothesis","label",dataset_name="metaeval/logiqa-2.0-nli")
-
-_oasst = dict(dataset_name="tasksource/oasst1_dense_flat",
-    pre_process = lambda ds:ds.filter(lambda x:x['lang']=='en'))
-
-oasst1__quality = Classification("parent_text","text",labels="quality",**_oasst)
-oasst1__toxicity = Classification("parent_text","text",labels="toxicity",**_oasst)
-oasst1__helpfulness = Classification("parent_text","text",labels="helpfulness",**_oasst)
-
-para_rules = Classification("context","question",
-    labels=name("label",["False","True"]),
-    dataset_name="qbao775/PARARULE-Plus")
-
-mindgames = Classification("premise","hypothesis","label",dataset_name="sileod/mindgames")
-
-def _udep_post_process(ds):
-    return ds.cast_column('labels', Sequence(ClassLabel(names=udep_en_labels)))
-
-udep__deprel = TokenClassification('tokens',lambda x:[udep_en_labels.index(a) for a in x['deprel']],
-    config_name=udep_en_configs,dataset_name="universal_dependencies",post_process=_udep_post_process)
-
-ambient= Classification("premise","hypothesis","hypothesis_ambiguous",dataset_name="metaeval/ambient")
-
-path_naturalness = MultipleChoice(constant(""),choices=['choice1','choice2'],labels="label",
-    dataset_name="metaeval/path-naturalness-prediction")
-
-civil_comments__toxicity = Classification("text",labels="toxicity")
-civil_comments__severe_toxicity = Classification("text",labels="severe_toxicity")
-civil_comments__obscene = Classification("text",labels="obscene")
-civil_comments__threat = Classification("text",labels="threat")
-civil_comments__insult = Classification("text",labels="insult")
-civil_comments__identity_attack = Classification("text",labels="identity_attack")
-civil_comments__sexual_explicit = Classification("text",labels="sexual_explicit")
-
-cloth = MultipleChoice("sentence", choices_list=lambda x:[x["answer"]]+x["distractors"],labels=constant(0), dataset_name="AndyChiang/cloth")
-dgen  = MultipleChoice("sentence", choices_list=lambda x:[x["answer"]]+x["distractors"],labels=constant(0), dataset_name="AndyChiang/dgen")
-
-oasst_rlhf = MultipleChoice("prompt",choices=['chosen','rejected'],labels=constant(0),
-    dataset_name="tasksource/oasst1_pairwise_rlhf_reward")
-
-i2d2 = Classification("sentence1",labels=name('label',['False','True']), dataset_name="tasksource/I2D2")
-
-arg_me = Classification('argument','conclusion','stance', dataset_name="webis/args_me")
-valueeval_stance = Classification("Premise","Conclusion","Stance", dataset_name="webis/Touche23-ValueEval")
-starcon = Classification('argument','topic','label',dataset_name="tasksource/starcon")
-
-banking77 = Classification("text",labels="label",dataset_name="PolyAI/banking77")
-
-ruletaker = Classification("context","question","label",dataset_name="tasksource/ruletaker")
-
-lsat_qa = MultipleChoice(
-    cat(['passage','question']),
-    choices_list='references',labels="gold_index",
-     dataset_name="lighteval/lsat_qa",config_name="all")
-    
-control = Classification('premise','hypothesis',"label",dataset_name="tasksource/ConTRoL-nli")
-tracie = Classification("premise","hypothesis","answer",dataset_name='tasksource/tracie')
-sherliic = Classification("premise","hypothesis","label",dataset_name='tasksource/sherliic')
-
-sen_making__1 = MultipleChoice(constant('Chose most plausible:'), choices=['sentence0','sentence1'],labels='false', 
-    dataset_name="tasksource/sen-making")
-
-sen_making__2 = MultipleChoice(lambda x: [x['sentence0'],x['sentence1']][x['false']] + '\n is not plausible because :',
-    choices=['A','B','C'],labels=lambda x: 'ABC'.index(x['reason']), dataset_name="tasksource/sen-making")
-
-winowhy = Classification('sentence', lambda x: f'In "{x["wnli_sent1"]}", {x["wnli_sent2"]}',
-    labels=name('label',['False','True']), dataset_name="tasksource/winowhy")
-
-#for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias":
-#    print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')")
-
-mbib_cognitive_bias	= Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias')
-mbib_fake_news	= Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news')
-mbib_gender_bias	= Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias')
-mbib_hate_speech	= Classification('text',labels=name('label',['not hate-speech','hate-speech']), dataset_name='mediabiasgroup/mbib-base', config_name='hate-speech')
-mbib_linguistic_bias	= Classification('text',labels=name('label',['not linguistic-bias','linguistic-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='linguistic-bias')
-mbib_political_bias	= Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias')
-mbib_racial_bias	= Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias')
-mbib_text_level_bias	= Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias')
-
-robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR")
-
-cluttr = Classification("story","query", "target_text",dataset_name="CLUTRR/v1", config_name="gen_train234_test2to10")
-
-logical_fallacy = Classification("source_article", labels="logical_fallacies", dataset_name="tasksource/logical-fallacy")
-
-parade = Classification("Definition1","Definition2", labels=name('Binary labels',["not-paraphrase","paraphrase"]), dataset_name="tasksource/parade")
-
-cladder = Classification("given_info", "question", "answer",dataset_name="tasksource/cladder")
-
-subjectivity = Classification("Sentence",labels="Label",dataset_name="tasksource/subjectivity")
-
-moh   = Classification("context","expression","label", dataset_name="tasksource/MOH")
-vuac  = Classification("context","expression","label", dataset_name="tasksource/VUAC")
-trofi = Classification("context","expression","label", dataset_name="tasksource/TroFi", splits=['train',None,'test'])
-
-sharc_classification = Classification("snippet", lambda x:f'{x["scenario"]}\n{x["question"]}',
-    labels=lambda x:x["answer"] if x['answer'] in  {"Yes","No","Irrelevant"} else "Clarification needed",
-    dataset_name='sharc_modified',config_name='mod')
-
-conceptrules_v2 = Classification("context", "text", "label", dataset_name="tasksource/conceptrules_v2")
-
-scidtb = Classification("unit1_txt","unit2_txt","label", dataset_name="metaeval/disrpt",config_name='eng.dep.scidtb.rels')
-
-chunking = TokenClassification("tokens","chunk_tags", dataset_name="conll2000")
-
-few_nerd = TokenClassification("tokens","fine_ner_tags",dataset_name="DFKI-SLT/few-nerd",config_name='supervised')
-finer = TokenClassification('tokens','ner_tags',dataset_name='nlpaueb/finer-139')
-
-label_nli = Classification("premise","hypothesis","labels",dataset_name='tasksource/zero-shot-label-nli')
-
-com2sense = Classification("sent",labels="label",dataset_name="tasksource/com2sense",splits=['train',"validation",None])
-
-scone = Classification('sentence1_edited','sentence2_edited','gold_label_edited',dataset_name="tasksource/scone")
-
-winodict = MultipleChoice(cat(['definition','sentence']),['option1','option2'],'label',dataset_name='tasksource/winodict')
-
-fool_me_twice = Classification(
-    lambda x: " ".join(a['text'] for a in x['gold_evidence']),
-    'text', 'label', dataset_name='tasksource/fool-me-twice')
-
-monli = Classification("sentence1","sentence2","gold_label", dataset_name="tasksource/monli")
-
-causality = Classification('premise','hypothesis','relation', dataset_name='tasksource/corr2cause')
-
-lsat = MultipleChoice(cat(['passage','question']), choices_list='references',labels='gold_index',dataset_name='lighteval/lsat_qa',config_name='all')
-
-apt = Classification('text_a','text_b',name('labels',['not_paraphrase','paraphrase']),dataset_name='tasksource/apt')
-
-#xsum_factuality = Classification("summary",labels="is_factual")
-
-financial_sentiment = Classification("text",labels=name('label',['Bearish','Bullish','Neutral']),
-    dataset_name="zeroshot/twitter-financial-news-sentiment")
-
-def _icl_rand(x):
-    import random
-    return random.Random(x['sentence1'][:50]).randint(0,1) #deterministic label for each input
-
-icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)],
-    labels=lambda x: int(x['symbols'][_icl_rand(x)]==x['targets']),
-    dataset_name="tasksource/icl-symbol-tuning-instruct",
-    pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char 
-    post_process=lambda ds:ds.cast_column('labels',ClassLabel(names=['False','True']))
-)
-
-space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI")
-
-propsegment = Classification("hypothesis","premise",
-    labels = lambda x:{'n':'neutral','e':'entailment','c':'contradiction'}[x['label']],
-    dataset_name="sihaochen/propsegment",config_name='nli')
-
-hatemoji = Classification('text',labels=name("label_gold", ['not-hate-speech','hate-speech']),
-    dataset_name="HannahRoseKirk/HatemojiBuild")
-
-regset = Classification("context",labels="answer",dataset_name='tasksource/regset')
-
-esci = Classification('query','product_text','esci_label',
-    dataset_name="tasksource/esci",
-    pre_process=lambda ds:ds.filter(lambda x:x['product_locale']=='us'))
-
-def _preprocess_chatbot_arena(ds):
-    ds=ds.filter(lambda x:x['winner'] in ["model_a","model_b"])
-    ds=ds.filter(lambda x:x['language']=="English")
-
-    def _unroll(x):
-        f=lambda x:"\n".join([f"{turn['role']}:\n{turn['content']}" for turn in x])
-        x['conversation_a'] = f(x['conversation_a'])
-        x['conversation_b'] = f(x['conversation_b'])
-        return x
-    ds=ds.map(_unroll)
-    return ds
-
-chatbot_arena = MultipleChoice(constant(""),
-    choices=["conversation_a","conversation_b"],
-    labels=lambda x: ["model_a","model_b"].index(x["winner"]),
-    dataset_name="lmsys/chatbot_arena_conversations",
-    pre_process=_preprocess_chatbot_arena)
-
-dnd_intent = Classification("examples",labels="label_names",
-    dataset_name='neurae/dnd_style_intents')
-
-fld = Classification("context","hypothesis", "proof_label",
-    dataset_name="hitachi-nlp/FLD.v2")
-
-sdoh_nli = Classification("premise","hypothesis",labels=lambda x:{True:"entailment",False:"not-entailment"}[x['label']],
-    dataset_name="tasksource/SDOH-NLI")
-
-scifact_entailment = Classification(lambda x:"\n".join(x["abstract"]),"claim",
-    labels=lambda x:x['verdict'].replace('NEI','NEUTRAL'),
-    dataset_name="allenai/scifact_entailment")
-
-feasibilityQA = Classification(cat(['knowledge','premise']),'hypothesis','binary_classification_label',
-    dataset_name="tasksource/feasibilityQA")
-                               
-simple_pair = Classification("premise","hypothesis","label", dataset_name="tasksource/simple_pair")
-adjective_scale_probe = Classification("premise","hypothesis","label", dataset_name="tasksource/AdjectiveScaleProbe-nli")
-repectively_nli = Classification("premise","hypothesis","label",dataset_name="tasksource/resnli")
-
diff --git a/tasks.md b/tasks.md
index 475a57a..27e6657 100644
--- a/tasks.md
+++ b/tasks.md
@@ -15,25 +15,25 @@
 |  12 | anli/a1                                                              | anli                                      |                                                     | a1              | anli__a1                                     | Classification      |
 |  13 | anli/a2                                                              | anli                                      |                                                     | a2              | anli__a2                                     | Classification      |
 |  14 | anli/a3                                                              | anli                                      |                                                     | a3              | anli__a3                                     | Classification      |
-|  15 | babi_nli/simple-negation                                             | metaeval/babi_nli                         | simple-negation                                     |                 | babi_nli                                     | Classification      |
-|  16 | babi_nli/path-finding                                                | metaeval/babi_nli                         | path-finding                                        |                 | babi_nli                                     | Classification      |
-|  17 | babi_nli/indefinite-knowledge                                        | metaeval/babi_nli                         | indefinite-knowledge                                |                 | babi_nli                                     | Classification      |
+|  15 | babi_nli/basic-induction                                             | metaeval/babi_nli                         | basic-induction                                     |                 | babi_nli                                     | Classification      |
+|  16 | babi_nli/time-reasoning                                              | metaeval/babi_nli                         | time-reasoning                                      |                 | babi_nli                                     | Classification      |
+|  17 | babi_nli/positional-reasoning                                        | metaeval/babi_nli                         | positional-reasoning                                |                 | babi_nli                                     | Classification      |
 |  18 | babi_nli/compound-coreference                                        | metaeval/babi_nli                         | compound-coreference                                |                 | babi_nli                                     | Classification      |
-|  19 | babi_nli/basic-deduction                                             | metaeval/babi_nli                         | basic-deduction                                     |                 | babi_nli                                     | Classification      |
-|  20 | babi_nli/time-reasoning                                              | metaeval/babi_nli                         | time-reasoning                                      |                 | babi_nli                                     | Classification      |
-|  21 | babi_nli/three-supporting-facts                                      | metaeval/babi_nli                         | three-supporting-facts                              |                 | babi_nli                                     | Classification      |
-|  22 | babi_nli/counting                                                    | metaeval/babi_nli                         | counting                                            |                 | babi_nli                                     | Classification      |
+|  19 | babi_nli/two-supporting-facts                                        | metaeval/babi_nli                         | two-supporting-facts                                |                 | babi_nli                                     | Classification      |
+|  20 | babi_nli/single-supporting-fact                                      | metaeval/babi_nli                         | single-supporting-fact                              |                 | babi_nli                                     | Classification      |
+|  21 | babi_nli/path-finding                                                | metaeval/babi_nli                         | path-finding                                        |                 | babi_nli                                     | Classification      |
+|  22 | babi_nli/basic-coreference                                           | metaeval/babi_nli                         | basic-coreference                                   |                 | babi_nli                                     | Classification      |
 |  23 | babi_nli/three-arg-relations                                         | metaeval/babi_nli                         | three-arg-relations                                 |                 | babi_nli                                     | Classification      |
-|  24 | babi_nli/two-supporting-facts                                        | metaeval/babi_nli                         | two-supporting-facts                                |                 | babi_nli                                     | Classification      |
-|  25 | babi_nli/basic-induction                                             | metaeval/babi_nli                         | basic-induction                                     |                 | babi_nli                                     | Classification      |
-|  26 | babi_nli/size-reasoning                                              | metaeval/babi_nli                         | size-reasoning                                      |                 | babi_nli                                     | Classification      |
-|  27 | babi_nli/positional-reasoning                                        | metaeval/babi_nli                         | positional-reasoning                                |                 | babi_nli                                     | Classification      |
-|  28 | babi_nli/basic-coreference                                           | metaeval/babi_nli                         | basic-coreference                                   |                 | babi_nli                                     | Classification      |
-|  29 | babi_nli/two-arg-relations                                           | metaeval/babi_nli                         | two-arg-relations                                   |                 | babi_nli                                     | Classification      |
-|  30 | babi_nli/single-supporting-fact                                      | metaeval/babi_nli                         | single-supporting-fact                              |                 | babi_nli                                     | Classification      |
-|  31 | babi_nli/yes-no-questions                                            | metaeval/babi_nli                         | yes-no-questions                                    |                 | babi_nli                                     | Classification      |
-|  32 | babi_nli/conjunction                                                 | metaeval/babi_nli                         | conjunction                                         |                 | babi_nli                                     | Classification      |
-|  33 | babi_nli/lists-sets                                                  | metaeval/babi_nli                         | lists-sets                                          |                 | babi_nli                                     | Classification      |
+|  24 | babi_nli/conjunction                                                 | metaeval/babi_nli                         | conjunction                                         |                 | babi_nli                                     | Classification      |
+|  25 | babi_nli/three-supporting-facts                                      | metaeval/babi_nli                         | three-supporting-facts                              |                 | babi_nli                                     | Classification      |
+|  26 | babi_nli/yes-no-questions                                            | metaeval/babi_nli                         | yes-no-questions                                    |                 | babi_nli                                     | Classification      |
+|  27 | babi_nli/indefinite-knowledge                                        | metaeval/babi_nli                         | indefinite-knowledge                                |                 | babi_nli                                     | Classification      |
+|  28 | babi_nli/basic-deduction                                             | metaeval/babi_nli                         | basic-deduction                                     |                 | babi_nli                                     | Classification      |
+|  29 | babi_nli/counting                                                    | metaeval/babi_nli                         | counting                                            |                 | babi_nli                                     | Classification      |
+|  30 | babi_nli/simple-negation                                             | metaeval/babi_nli                         | simple-negation                                     |                 | babi_nli                                     | Classification      |
+|  31 | babi_nli/lists-sets                                                  | metaeval/babi_nli                         | lists-sets                                          |                 | babi_nli                                     | Classification      |
+|  32 | babi_nli/size-reasoning                                              | metaeval/babi_nli                         | size-reasoning                                      |                 | babi_nli                                     | Classification      |
+|  33 | babi_nli/two-arg-relations                                           | metaeval/babi_nli                         | two-arg-relations                                   |                 | babi_nli                                     | Classification      |
 |  34 | sick/label                                                           | sick                                      |                                                     | label           | sick__label                                  | Classification      |
 |  35 | sick/relatedness                                                     | sick                                      |                                                     | relatedness     | sick__relatedness                            | Classification      |
 |  36 | sick/entailment_AB                                                   | sick                                      |                                                     | entailment_AB   | sick__entailment_AB                          | Classification      |
@@ -41,17 +41,17 @@
 |  38 | scitail/snli_format                                                  | scitail                                   | snli_format                                         |                 | scitail                                      | Classification      |
 |  39 | hans                                                                 | hans                                      |                                                     |                 | hans                                         | Classification      |
 |  40 | WANLI                                                                | alisawuffles/WANLI                        |                                                     |                 | wanli                                        | Classification      |
-|  41 | recast/recast_factuality                                             | metaeval/recast                           | recast_factuality                                   |                 | recast_nli                                   | Classification      |
+|  41 | recast/recast_sentiment                                              | metaeval/recast                           | recast_sentiment                                    |                 | recast_nli                                   | Classification      |
 |  42 | recast/recast_kg_relations                                           | metaeval/recast                           | recast_kg_relations                                 |                 | recast_nli                                   | Classification      |
-|  43 | recast/recast_puns                                                   | metaeval/recast                           | recast_puns                                         |                 | recast_nli                                   | Classification      |
-|  44 | recast/recast_sentiment                                              | metaeval/recast                           | recast_sentiment                                    |                 | recast_nli                                   | Classification      |
-|  45 | recast/recast_verbnet                                                | metaeval/recast                           | recast_verbnet                                      |                 | recast_nli                                   | Classification      |
-|  46 | recast/recast_ner                                                    | metaeval/recast                           | recast_ner                                          |                 | recast_nli                                   | Classification      |
-|  47 | recast/recast_verbcorner                                             | metaeval/recast                           | recast_verbcorner                                   |                 | recast_nli                                   | Classification      |
-|  48 | recast/recast_megaveridicality                                       | metaeval/recast                           | recast_megaveridicality                             |                 | recast_nli                                   | Classification      |
+|  43 | recast/recast_verbnet                                                | metaeval/recast                           | recast_verbnet                                      |                 | recast_nli                                   | Classification      |
+|  44 | recast/recast_megaveridicality                                       | metaeval/recast                           | recast_megaveridicality                             |                 | recast_nli                                   | Classification      |
+|  45 | recast/recast_puns                                                   | metaeval/recast                           | recast_puns                                         |                 | recast_nli                                   | Classification      |
+|  46 | recast/recast_verbcorner                                             | metaeval/recast                           | recast_verbcorner                                   |                 | recast_nli                                   | Classification      |
+|  47 | recast/recast_ner                                                    | metaeval/recast                           | recast_ner                                          |                 | recast_nli                                   | Classification      |
+|  48 | recast/recast_factuality                                             | metaeval/recast                           | recast_factuality                                   |                 | recast_nli                                   | Classification      |
 |  49 | probability_words_nli/reasoning_2hop                                 | sileod/probability_words_nli              | reasoning_2hop                                      |                 | probability_words_nli                        | Classification      |
-|  50 | probability_words_nli/usnli                                          | sileod/probability_words_nli              | usnli                                               |                 | probability_words_nli                        | Classification      |
-|  51 | probability_words_nli/reasoning_1hop                                 | sileod/probability_words_nli              | reasoning_1hop                                      |                 | probability_words_nli                        | Classification      |
+|  50 | probability_words_nli/reasoning_1hop                                 | sileod/probability_words_nli              | reasoning_1hop                                      |                 | probability_words_nli                        | Classification      |
+|  51 | probability_words_nli/usnli                                          | sileod/probability_words_nli              | usnli                                               |                 | probability_words_nli                        | Classification      |
 |  52 | nan-nli/joey234--nan-nli                                             | joey234/nan-nli                           | joey234--nan-nli                                    |                 | nan_nli                                      | Classification      |
 |  53 | nli_fever                                                            | pietrolesci/nli_fever                     |                                                     |                 | nli_fever                                    | Classification      |
 |  54 | breaking_nli                                                         | pietrolesci/breaking_nli                  |                                                     |                 | breaking_nli                                 | Classification      |
@@ -81,29 +81,29 @@
 |  78 | gen_debiased_nli/mnli_z_aug                                          | pietrolesci/gen_debiased_nli              |                                                     | mnli_z_aug      | gen_debiased_nli__mnli_z_aug                 | Classification      |
 |  79 | gen_debiased_nli/mnli_seq_z                                          | pietrolesci/gen_debiased_nli              |                                                     | mnli_seq_z      | gen_debiased_nli__mnli_seq_z                 | Classification      |
 |  80 | add_one_rte                                                          | pietrolesci/add_one_rte                   |                                                     |                 | add_one_rte                                  | Classification      |
-|  81 | imppres/presupposition_possessed_definites_existence/presupposition  | metaeval/imppres                          | presupposition_possessed_definites_existence        | presupposition  | imppres__presupposition                      | Classification      |
-|  82 | imppres/presupposition_all_n_presupposition/presupposition           | metaeval/imppres                          | presupposition_all_n_presupposition                 | presupposition  | imppres__presupposition                      | Classification      |
-|  83 | imppres/presupposition_possessed_definites_uniqueness/presupposition | metaeval/imppres                          | presupposition_possessed_definites_uniqueness       | presupposition  | imppres__presupposition                      | Classification      |
-|  84 | imppres/presupposition_only_presupposition/presupposition            | metaeval/imppres                          | presupposition_only_presupposition                  | presupposition  | imppres__presupposition                      | Classification      |
-|  85 | imppres/presupposition_question_presupposition/presupposition        | metaeval/imppres                          | presupposition_question_presupposition              | presupposition  | imppres__presupposition                      | Classification      |
-|  86 | imppres/presupposition_cleft_existence/presupposition                | metaeval/imppres                          | presupposition_cleft_existence                      | presupposition  | imppres__presupposition                      | Classification      |
-|  87 | imppres/presupposition_both_presupposition/presupposition            | metaeval/imppres                          | presupposition_both_presupposition                  | presupposition  | imppres__presupposition                      | Classification      |
-|  88 | imppres/presupposition_change_of_state/presupposition                | metaeval/imppres                          | presupposition_change_of_state                      | presupposition  | imppres__presupposition                      | Classification      |
-|  89 | imppres/presupposition_cleft_uniqueness/presupposition               | metaeval/imppres                          | presupposition_cleft_uniqueness                     | presupposition  | imppres__presupposition                      | Classification      |
-|  90 | imppres/implicature_numerals_2_3/prag                                | metaeval/imppres                          | implicature_numerals_2_3                            | prag            | imppres__prag                                | Classification      |
-|  91 | imppres/implicature_numerals_10_100/prag                             | metaeval/imppres                          | implicature_numerals_10_100                         | prag            | imppres__prag                                | Classification      |
+|  81 | imppres/presupposition_only_presupposition/presupposition            | metaeval/imppres                          | presupposition_only_presupposition                  | presupposition  | imppres__presupposition                      | Classification      |
+|  82 | imppres/presupposition_question_presupposition/presupposition        | metaeval/imppres                          | presupposition_question_presupposition              | presupposition  | imppres__presupposition                      | Classification      |
+|  83 | imppres/presupposition_possessed_definites_existence/presupposition  | metaeval/imppres                          | presupposition_possessed_definites_existence        | presupposition  | imppres__presupposition                      | Classification      |
+|  84 | imppres/presupposition_all_n_presupposition/presupposition           | metaeval/imppres                          | presupposition_all_n_presupposition                 | presupposition  | imppres__presupposition                      | Classification      |
+|  85 | imppres/presupposition_cleft_existence/presupposition                | metaeval/imppres                          | presupposition_cleft_existence                      | presupposition  | imppres__presupposition                      | Classification      |
+|  86 | imppres/presupposition_cleft_uniqueness/presupposition               | metaeval/imppres                          | presupposition_cleft_uniqueness                     | presupposition  | imppres__presupposition                      | Classification      |
+|  87 | imppres/presupposition_change_of_state/presupposition                | metaeval/imppres                          | presupposition_change_of_state                      | presupposition  | imppres__presupposition                      | Classification      |
+|  88 | imppres/presupposition_both_presupposition/presupposition            | metaeval/imppres                          | presupposition_both_presupposition                  | presupposition  | imppres__presupposition                      | Classification      |
+|  89 | imppres/presupposition_possessed_definites_uniqueness/presupposition | metaeval/imppres                          | presupposition_possessed_definites_uniqueness       | presupposition  | imppres__presupposition                      | Classification      |
+|  90 | imppres/implicature_connectives/prag                                 | metaeval/imppres                          | implicature_connectives                             | prag            | imppres__prag                                | Classification      |
+|  91 | imppres/implicature_gradable_verb/prag                               | metaeval/imppres                          | implicature_gradable_verb                           | prag            | imppres__prag                                | Classification      |
 |  92 | imppres/implicature_modals/prag                                      | metaeval/imppres                          | implicature_modals                                  | prag            | imppres__prag                                | Classification      |
 |  93 | imppres/implicature_quantifiers/prag                                 | metaeval/imppres                          | implicature_quantifiers                             | prag            | imppres__prag                                | Classification      |
-|  94 | imppres/implicature_connectives/prag                                 | metaeval/imppres                          | implicature_connectives                             | prag            | imppres__prag                                | Classification      |
-|  95 | imppres/implicature_gradable_adjective/prag                          | metaeval/imppres                          | implicature_gradable_adjective                      | prag            | imppres__prag                                | Classification      |
-|  96 | imppres/implicature_gradable_verb/prag                               | metaeval/imppres                          | implicature_gradable_verb                           | prag            | imppres__prag                                | Classification      |
-|  97 | imppres/implicature_numerals_2_3/log                                 | metaeval/imppres                          | implicature_numerals_2_3                            | log             | imppres__log                                 | Classification      |
+|  94 | imppres/implicature_numerals_2_3/prag                                | metaeval/imppres                          | implicature_numerals_2_3                            | prag            | imppres__prag                                | Classification      |
+|  95 | imppres/implicature_numerals_10_100/prag                             | metaeval/imppres                          | implicature_numerals_10_100                         | prag            | imppres__prag                                | Classification      |
+|  96 | imppres/implicature_gradable_adjective/prag                          | metaeval/imppres                          | implicature_gradable_adjective                      | prag            | imppres__prag                                | Classification      |
+|  97 | imppres/implicature_connectives/log                                  | metaeval/imppres                          | implicature_connectives                             | log             | imppres__log                                 | Classification      |
 |  98 | imppres/implicature_modals/log                                       | metaeval/imppres                          | implicature_modals                                  | log             | imppres__log                                 | Classification      |
-|  99 | imppres/implicature_gradable_verb/log                                | metaeval/imppres                          | implicature_gradable_verb                           | log             | imppres__log                                 | Classification      |
-| 100 | imppres/implicature_gradable_adjective/log                           | metaeval/imppres                          | implicature_gradable_adjective                      | log             | imppres__log                                 | Classification      |
-| 101 | imppres/implicature_connectives/log                                  | metaeval/imppres                          | implicature_connectives                             | log             | imppres__log                                 | Classification      |
-| 102 | imppres/implicature_numerals_10_100/log                              | metaeval/imppres                          | implicature_numerals_10_100                         | log             | imppres__log                                 | Classification      |
-| 103 | imppres/implicature_quantifiers/log                                  | metaeval/imppres                          | implicature_quantifiers                             | log             | imppres__log                                 | Classification      |
+|  99 | imppres/implicature_numerals_2_3/log                                 | metaeval/imppres                          | implicature_numerals_2_3                            | log             | imppres__log                                 | Classification      |
+| 100 | imppres/implicature_gradable_verb/log                                | metaeval/imppres                          | implicature_gradable_verb                           | log             | imppres__log                                 | Classification      |
+| 101 | imppres/implicature_gradable_adjective/log                           | metaeval/imppres                          | implicature_gradable_adjective                      | log             | imppres__log                                 | Classification      |
+| 102 | imppres/implicature_quantifiers/log                                  | metaeval/imppres                          | implicature_quantifiers                             | log             | imppres__log                                 | Classification      |
+| 103 | imppres/implicature_numerals_10_100/log                              | metaeval/imppres                          | implicature_numerals_10_100                         | log             | imppres__log                                 | Classification      |
 | 104 | glue_diagnostics/diagnostics                                         | pietrolesci/glue_diagnostics              |                                                     | diagnostics     | glue__diagnostics                            | Classification      |
 | 105 | hlgd                                                                 | hlgd                                      |                                                     |                 | hlgd                                         | Classification      |
 | 106 | paws/labeled_final                                                   | paws                                      | labeled_final                                       |                 | paws___labeled_final                         | Classification      |
@@ -116,128 +116,128 @@
 | 113 | model-written-evals                                                  | Anthropic/model-written-evals             |                                                     |                 | model_written_evals                          | MultipleChoice      |
 | 114 | truthful_qa/multiple_choice                                          | truthful_qa                               | multiple_choice                                     |                 | truthful_qa___multiple_choice                | MultipleChoice      |
 | 115 | fig-qa                                                               | nightingal3/fig-qa                        |                                                     |                 | fig_qa                                       | MultipleChoice      |
-| 116 | bigbench/suicide_risk                                                | tasksource/bigbench                       | suicide_risk                                        |                 | bigbench                                     | MultipleChoice      |
-| 117 | bigbench/empirical_judgments                                         | tasksource/bigbench                       | empirical_judgments                                 |                 | bigbench                                     | MultipleChoice      |
-| 118 | bigbench/winowhy                                                     | tasksource/bigbench                       | winowhy                                             |                 | bigbench                                     | MultipleChoice      |
-| 119 | bigbench/gre_reading_comprehension                                   | tasksource/bigbench                       | gre_reading_comprehension                           |                 | bigbench                                     | MultipleChoice      |
-| 120 | bigbench/anachronisms                                                | tasksource/bigbench                       | anachronisms                                        |                 | bigbench                                     | MultipleChoice      |
-| 121 | bigbench/logical_sequence                                            | tasksource/bigbench                       | logical_sequence                                    |                 | bigbench                                     | MultipleChoice      |
-| 122 | bigbench/emojis_emotion_prediction                                   | tasksource/bigbench                       | emojis_emotion_prediction                           |                 | bigbench                                     | MultipleChoice      |
-| 123 | bigbench/identify_odd_metaphor                                       | tasksource/bigbench                       | identify_odd_metaphor                               |                 | bigbench                                     | MultipleChoice      |
-| 124 | bigbench/intent_recognition                                          | tasksource/bigbench                       | intent_recognition                                  |                 | bigbench                                     | MultipleChoice      |
-| 125 | bigbench/similarities_abstraction                                    | tasksource/bigbench                       | similarities_abstraction                            |                 | bigbench                                     | MultipleChoice      |
-| 126 | bigbench/implicatures                                                | tasksource/bigbench                       | implicatures                                        |                 | bigbench                                     | MultipleChoice      |
-| 127 | bigbench/entailed_polarity                                           | tasksource/bigbench                       | entailed_polarity                                   |                 | bigbench                                     | MultipleChoice      |
-| 128 | bigbench/checkmate_in_one                                            | tasksource/bigbench                       | checkmate_in_one                                    |                 | bigbench                                     | MultipleChoice      |
-| 129 | bigbench/tracking_shuffled_objects                                   | tasksource/bigbench                       | tracking_shuffled_objects                           |                 | bigbench                                     | MultipleChoice      |
-| 130 | bigbench/logic_grid_puzzle                                           | tasksource/bigbench                       | logic_grid_puzzle                                   |                 | bigbench                                     | MultipleChoice      |
-| 131 | bigbench/strategyqa                                                  | tasksource/bigbench                       | strategyqa                                          |                 | bigbench                                     | MultipleChoice      |
-| 132 | bigbench/elementary_math_qa                                          | tasksource/bigbench                       | elementary_math_qa                                  |                 | bigbench                                     | MultipleChoice      |
-| 133 | bigbench/reasoning_about_colored_objects                             | tasksource/bigbench                       | reasoning_about_colored_objects                     |                 | bigbench                                     | MultipleChoice      |
-| 134 | bigbench/arithmetic                                                  | tasksource/bigbench                       | arithmetic                                          |                 | bigbench                                     | MultipleChoice      |
-| 135 | bigbench/discourse_marker_prediction                                 | tasksource/bigbench                       | discourse_marker_prediction                         |                 | bigbench                                     | MultipleChoice      |
-| 136 | bigbench/navigate                                                    | tasksource/bigbench                       | navigate                                            |                 | bigbench                                     | MultipleChoice      |
-| 137 | bigbench/strange_stories                                             | tasksource/bigbench                       | strange_stories                                     |                 | bigbench                                     | MultipleChoice      |
-| 138 | bigbench/symbol_interpretation                                       | tasksource/bigbench                       | symbol_interpretation                               |                 | bigbench                                     | MultipleChoice      |
-| 139 | bigbench/epistemic_reasoning                                         | tasksource/bigbench                       | epistemic_reasoning                                 |                 | bigbench                                     | MultipleChoice      |
-| 140 | bigbench/penguins_in_a_table                                         | tasksource/bigbench                       | penguins_in_a_table                                 |                 | bigbench                                     | MultipleChoice      |
-| 141 | bigbench/emoji_movie                                                 | tasksource/bigbench                       | emoji_movie                                         |                 | bigbench                                     | MultipleChoice      |
-| 142 | bigbench/undo_permutation                                            | tasksource/bigbench                       | undo_permutation                                    |                 | bigbench                                     | MultipleChoice      |
-| 143 | bigbench/causal_judgment                                             | tasksource/bigbench                       | causal_judgment                                     |                 | bigbench                                     | MultipleChoice      |
-| 144 | bigbench/irony_identification                                        | tasksource/bigbench                       | irony_identification                                |                 | bigbench                                     | MultipleChoice      |
-| 145 | bigbench/snarks                                                      | tasksource/bigbench                       | snarks                                              |                 | bigbench                                     | MultipleChoice      |
-| 146 | bigbench/temporal_sequences                                          | tasksource/bigbench                       | temporal_sequences                                  |                 | bigbench                                     | MultipleChoice      |
-| 147 | bigbench/phrase_relatedness                                          | tasksource/bigbench                       | phrase_relatedness                                  |                 | bigbench                                     | MultipleChoice      |
-| 148 | bigbench/riddle_sense                                                | tasksource/bigbench                       | riddle_sense                                        |                 | bigbench                                     | MultipleChoice      |
-| 149 | bigbench/question_selection                                          | tasksource/bigbench                       | question_selection                                  |                 | bigbench                                     | MultipleChoice      |
-| 150 | bigbench/analogical_similarity                                       | tasksource/bigbench                       | analogical_similarity                               |                 | bigbench                                     | MultipleChoice      |
-| 151 | bigbench/disambiguation_qa                                           | tasksource/bigbench                       | disambiguation_qa                                   |                 | bigbench                                     | MultipleChoice      |
-| 152 | bigbench/metaphor_boolean                                            | tasksource/bigbench                       | metaphor_boolean                                    |                 | bigbench                                     | MultipleChoice      |
-| 153 | bigbench/hhh_alignment                                               | tasksource/bigbench                       | hhh_alignment                                       |                 | bigbench                                     | MultipleChoice      |
-| 154 | bigbench/implicit_relations                                          | tasksource/bigbench                       | implicit_relations                                  |                 | bigbench                                     | MultipleChoice      |
-| 155 | bigbench/logical_deduction                                           | tasksource/bigbench                       | logical_deduction                                   |                 | bigbench                                     | MultipleChoice      |
-| 156 | bigbench/mathematical_induction                                      | tasksource/bigbench                       | mathematical_induction                              |                 | bigbench                                     | MultipleChoice      |
-| 157 | bigbench/evaluating_information_essentiality                         | tasksource/bigbench                       | evaluating_information_essentiality                 |                 | bigbench                                     | MultipleChoice      |
-| 158 | bigbench/timedial                                                    | tasksource/bigbench                       | timedial                                            |                 | bigbench                                     | MultipleChoice      |
-| 159 | bigbench/dyck_languages                                              | tasksource/bigbench                       | dyck_languages                                      |                 | bigbench                                     | MultipleChoice      |
-| 160 | bigbench/moral_permissibility                                        | tasksource/bigbench                       | moral_permissibility                                |                 | bigbench                                     | MultipleChoice      |
-| 161 | bigbench/play_dialog_same_or_different                               | tasksource/bigbench                       | play_dialog_same_or_different                       |                 | bigbench                                     | MultipleChoice      |
-| 162 | bigbench/salient_translation_error_detection                         | tasksource/bigbench                       | salient_translation_error_detection                 |                 | bigbench                                     | MultipleChoice      |
-| 163 | bigbench/key_value_maps                                              | tasksource/bigbench                       | key_value_maps                                      |                 | bigbench                                     | MultipleChoice      |
-| 164 | bigbench/unit_interpretation                                         | tasksource/bigbench                       | unit_interpretation                                 |                 | bigbench                                     | MultipleChoice      |
-| 165 | bigbench/cs_algorithms                                               | tasksource/bigbench                       | cs_algorithms                                       |                 | bigbench                                     | MultipleChoice      |
-| 166 | bigbench/hyperbaton                                                  | tasksource/bigbench                       | hyperbaton                                          |                 | bigbench                                     | MultipleChoice      |
-| 167 | bigbench/general_knowledge                                           | tasksource/bigbench                       | general_knowledge                                   |                 | bigbench                                     | MultipleChoice      |
-| 168 | bigbench/dark_humor_detection                                        | tasksource/bigbench                       | dark_humor_detection                                |                 | bigbench                                     | MultipleChoice      |
-| 169 | bigbench/simple_ethical_questions                                    | tasksource/bigbench                       | simple_ethical_questions                            |                 | bigbench                                     | MultipleChoice      |
-| 170 | bigbench/cause_and_effect                                            | tasksource/bigbench                       | cause_and_effect                                    |                 | bigbench                                     | MultipleChoice      |
-| 171 | bigbench/fantasy_reasoning                                           | tasksource/bigbench                       | fantasy_reasoning                                   |                 | bigbench                                     | MultipleChoice      |
+| 116 | bigbench/undo_permutation                                            | tasksource/bigbench                       | undo_permutation                                    |                 | bigbench                                     | MultipleChoice      |
+| 117 | bigbench/gre_reading_comprehension                                   | tasksource/bigbench                       | gre_reading_comprehension                           |                 | bigbench                                     | MultipleChoice      |
+| 118 | bigbench/analogical_similarity                                       | tasksource/bigbench                       | analogical_similarity                               |                 | bigbench                                     | MultipleChoice      |
+| 119 | bigbench/identify_math_theorems                                      | tasksource/bigbench                       | identify_math_theorems                              |                 | bigbench                                     | MultipleChoice      |
+| 120 | bigbench/intent_recognition                                          | tasksource/bigbench                       | intent_recognition                                  |                 | bigbench                                     | MultipleChoice      |
+| 121 | bigbench/identify_odd_metaphor                                       | tasksource/bigbench                       | identify_odd_metaphor                               |                 | bigbench                                     | MultipleChoice      |
+| 122 | bigbench/logical_sequence                                            | tasksource/bigbench                       | logical_sequence                                    |                 | bigbench                                     | MultipleChoice      |
+| 123 | bigbench/checkmate_in_one                                            | tasksource/bigbench                       | checkmate_in_one                                    |                 | bigbench                                     | MultipleChoice      |
+| 124 | bigbench/english_proverbs                                            | tasksource/bigbench                       | english_proverbs                                    |                 | bigbench                                     | MultipleChoice      |
+| 125 | bigbench/real_or_fake_text                                           | tasksource/bigbench                       | real_or_fake_text                                   |                 | bigbench                                     | MultipleChoice      |
+| 126 | bigbench/phrase_relatedness                                          | tasksource/bigbench                       | phrase_relatedness                                  |                 | bigbench                                     | MultipleChoice      |
+| 127 | bigbench/empirical_judgments                                         | tasksource/bigbench                       | empirical_judgments                                 |                 | bigbench                                     | MultipleChoice      |
+| 128 | bigbench/timedial                                                    | tasksource/bigbench                       | timedial                                            |                 | bigbench                                     | MultipleChoice      |
+| 129 | bigbench/abstract_narrative_understanding                            | tasksource/bigbench                       | abstract_narrative_understanding                    |                 | bigbench                                     | MultipleChoice      |
+| 130 | bigbench/fact_checker                                                | tasksource/bigbench                       | fact_checker                                        |                 | bigbench                                     | MultipleChoice      |
+| 131 | bigbench/simple_ethical_questions                                    | tasksource/bigbench                       | simple_ethical_questions                            |                 | bigbench                                     | MultipleChoice      |
+| 132 | bigbench/temporal_sequences                                          | tasksource/bigbench                       | temporal_sequences                                  |                 | bigbench                                     | MultipleChoice      |
+| 133 | bigbench/logic_grid_puzzle                                           | tasksource/bigbench                       | logic_grid_puzzle                                   |                 | bigbench                                     | MultipleChoice      |
+| 134 | bigbench/cause_and_effect                                            | tasksource/bigbench                       | cause_and_effect                                    |                 | bigbench                                     | MultipleChoice      |
+| 135 | bigbench/sentence_ambiguity                                          | tasksource/bigbench                       | sentence_ambiguity                                  |                 | bigbench                                     | MultipleChoice      |
+| 136 | bigbench/understanding_fables                                        | tasksource/bigbench                       | understanding_fables                                |                 | bigbench                                     | MultipleChoice      |
+| 137 | bigbench/moral_permissibility                                        | tasksource/bigbench                       | moral_permissibility                                |                 | bigbench                                     | MultipleChoice      |
+| 138 | bigbench/international_phonetic_alphabet_nli                         | tasksource/bigbench                       | international_phonetic_alphabet_nli                 |                 | bigbench                                     | MultipleChoice      |
+| 139 | bigbench/misconceptions                                              | tasksource/bigbench                       | misconceptions                                      |                 | bigbench                                     | MultipleChoice      |
+| 140 | bigbench/movie_recommendation                                        | tasksource/bigbench                       | movie_recommendation                                |                 | bigbench                                     | MultipleChoice      |
+| 141 | bigbench/disambiguation_qa                                           | tasksource/bigbench                       | disambiguation_qa                                   |                 | bigbench                                     | MultipleChoice      |
+| 142 | bigbench/metaphor_understanding                                      | tasksource/bigbench                       | metaphor_understanding                              |                 | bigbench                                     | MultipleChoice      |
+| 143 | bigbench/logical_fallacy_detection                                   | tasksource/bigbench                       | logical_fallacy_detection                           |                 | bigbench                                     | MultipleChoice      |
+| 144 | bigbench/suicide_risk                                                | tasksource/bigbench                       | suicide_risk                                        |                 | bigbench                                     | MultipleChoice      |
+| 145 | bigbench/dark_humor_detection                                        | tasksource/bigbench                       | dark_humor_detection                                |                 | bigbench                                     | MultipleChoice      |
+| 146 | bigbench/conceptual_combinations                                     | tasksource/bigbench                       | conceptual_combinations                             |                 | bigbench                                     | MultipleChoice      |
+| 147 | bigbench/arithmetic                                                  | tasksource/bigbench                       | arithmetic                                          |                 | bigbench                                     | MultipleChoice      |
+| 148 | bigbench/nonsense_words_grammar                                      | tasksource/bigbench                       | nonsense_words_grammar                              |                 | bigbench                                     | MultipleChoice      |
+| 149 | bigbench/goal_step_wikihow                                           | tasksource/bigbench                       | goal_step_wikihow                                   |                 | bigbench                                     | MultipleChoice      |
+| 150 | bigbench/metaphor_boolean                                            | tasksource/bigbench                       | metaphor_boolean                                    |                 | bigbench                                     | MultipleChoice      |
+| 151 | bigbench/mnist_ascii                                                 | tasksource/bigbench                       | mnist_ascii                                         |                 | bigbench                                     | MultipleChoice      |
+| 152 | bigbench/irony_identification                                        | tasksource/bigbench                       | irony_identification                                |                 | bigbench                                     | MultipleChoice      |
+| 153 | bigbench/question_selection                                          | tasksource/bigbench                       | question_selection                                  |                 | bigbench                                     | MultipleChoice      |
+| 154 | bigbench/logical_deduction                                           | tasksource/bigbench                       | logical_deduction                                   |                 | bigbench                                     | MultipleChoice      |
+| 155 | bigbench/hindu_knowledge                                             | tasksource/bigbench                       | hindu_knowledge                                     |                 | bigbench                                     | MultipleChoice      |
+| 156 | bigbench/movie_dialog_same_or_different                              | tasksource/bigbench                       | movie_dialog_same_or_different                      |                 | bigbench                                     | MultipleChoice      |
+| 157 | bigbench/social_iqa                                                  | tasksource/bigbench                       | social_iqa                                          |                 | bigbench                                     | MultipleChoice      |
+| 158 | bigbench/strategyqa                                                  | tasksource/bigbench                       | strategyqa                                          |                 | bigbench                                     | MultipleChoice      |
+| 159 | bigbench/tracking_shuffled_objects                                   | tasksource/bigbench                       | tracking_shuffled_objects                           |                 | bigbench                                     | MultipleChoice      |
+| 160 | bigbench/discourse_marker_prediction                                 | tasksource/bigbench                       | discourse_marker_prediction                         |                 | bigbench                                     | MultipleChoice      |
+| 161 | bigbench/physical_intuition                                          | tasksource/bigbench                       | physical_intuition                                  |                 | bigbench                                     | MultipleChoice      |
+| 162 | bigbench/causal_judgment                                             | tasksource/bigbench                       | causal_judgment                                     |                 | bigbench                                     | MultipleChoice      |
+| 163 | bigbench/code_line_description                                       | tasksource/bigbench                       | code_line_description                               |                 | bigbench                                     | MultipleChoice      |
+| 164 | bigbench/implicatures                                                | tasksource/bigbench                       | implicatures                                        |                 | bigbench                                     | MultipleChoice      |
+| 165 | bigbench/crash_blossom                                               | tasksource/bigbench                       | crash_blossom                                       |                 | bigbench                                     | MultipleChoice      |
+| 166 | bigbench/known_unknowns                                              | tasksource/bigbench                       | known_unknowns                                      |                 | bigbench                                     | MultipleChoice      |
+| 167 | bigbench/entailed_polarity                                           | tasksource/bigbench                       | entailed_polarity                                   |                 | bigbench                                     | MultipleChoice      |
+| 168 | bigbench/novel_concepts                                              | tasksource/bigbench                       | novel_concepts                                      |                 | bigbench                                     | MultipleChoice      |
+| 169 | bigbench/dyck_languages                                              | tasksource/bigbench                       | dyck_languages                                      |                 | bigbench                                     | MultipleChoice      |
+| 170 | bigbench/ruin_names                                                  | tasksource/bigbench                       | ruin_names                                          |                 | bigbench                                     | MultipleChoice      |
+| 171 | bigbench/figure_of_speech_detection                                  | tasksource/bigbench                       | figure_of_speech_detection                          |                 | bigbench                                     | MultipleChoice      |
 | 172 | bigbench/vitaminc_fact_verification                                  | tasksource/bigbench                       | vitaminc_fact_verification                          |                 | bigbench                                     | MultipleChoice      |
-| 173 | bigbench/sentence_ambiguity                                          | tasksource/bigbench                       | sentence_ambiguity                                  |                 | bigbench                                     | MultipleChoice      |
-| 174 | bigbench/ruin_names                                                  | tasksource/bigbench                       | ruin_names                                          |                 | bigbench                                     | MultipleChoice      |
-| 175 | bigbench/crass_ai                                                    | tasksource/bigbench                       | crass_ai                                            |                 | bigbench                                     | MultipleChoice      |
-| 176 | bigbench/movie_recommendation                                        | tasksource/bigbench                       | movie_recommendation                                |                 | bigbench                                     | MultipleChoice      |
-| 177 | bigbench/contextual_parametric_knowledge_conflicts                   | tasksource/bigbench                       | contextual_parametric_knowledge_conflicts           |                 | bigbench                                     | MultipleChoice      |
-| 178 | bigbench/movie_dialog_same_or_different                              | tasksource/bigbench                       | movie_dialog_same_or_different                      |                 | bigbench                                     | MultipleChoice      |
-| 179 | bigbench/crash_blossom                                               | tasksource/bigbench                       | crash_blossom                                       |                 | bigbench                                     | MultipleChoice      |
-| 180 | bigbench/hindu_knowledge                                             | tasksource/bigbench                       | hindu_knowledge                                     |                 | bigbench                                     | MultipleChoice      |
-| 181 | bigbench/known_unknowns                                              | tasksource/bigbench                       | known_unknowns                                      |                 | bigbench                                     | MultipleChoice      |
+| 173 | bigbench/emojis_emotion_prediction                                   | tasksource/bigbench                       | emojis_emotion_prediction                           |                 | bigbench                                     | MultipleChoice      |
+| 174 | bigbench/odd_one_out                                                 | tasksource/bigbench                       | odd_one_out                                         |                 | bigbench                                     | MultipleChoice      |
+| 175 | bigbench/play_dialog_same_or_different                               | tasksource/bigbench                       | play_dialog_same_or_different                       |                 | bigbench                                     | MultipleChoice      |
+| 176 | bigbench/formal_fallacies_syllogisms_negation                        | tasksource/bigbench                       | formal_fallacies_syllogisms_negation                |                 | bigbench                                     | MultipleChoice      |
+| 177 | bigbench/hhh_alignment                                               | tasksource/bigbench                       | hhh_alignment                                       |                 | bigbench                                     | MultipleChoice      |
+| 178 | bigbench/salient_translation_error_detection                         | tasksource/bigbench                       | salient_translation_error_detection                 |                 | bigbench                                     | MultipleChoice      |
+| 179 | bigbench/riddle_sense                                                | tasksource/bigbench                       | riddle_sense                                        |                 | bigbench                                     | MultipleChoice      |
+| 180 | bigbench/elementary_math_qa                                          | tasksource/bigbench                       | elementary_math_qa                                  |                 | bigbench                                     | MultipleChoice      |
+| 181 | bigbench/cifar10_classification                                      | tasksource/bigbench                       | cifar10_classification                              |                 | bigbench                                     | MultipleChoice      |
 | 182 | bigbench/geometric_shapes                                            | tasksource/bigbench                       | geometric_shapes                                    |                 | bigbench                                     | MultipleChoice      |
-| 183 | bigbench/mnist_ascii                                                 | tasksource/bigbench                       | mnist_ascii                                         |                 | bigbench                                     | MultipleChoice      |
-| 184 | bigbench/goal_step_wikihow                                           | tasksource/bigbench                       | goal_step_wikihow                                   |                 | bigbench                                     | MultipleChoice      |
-| 185 | bigbench/metaphor_understanding                                      | tasksource/bigbench                       | metaphor_understanding                              |                 | bigbench                                     | MultipleChoice      |
-| 186 | bigbench/social_iqa                                                  | tasksource/bigbench                       | social_iqa                                          |                 | bigbench                                     | MultipleChoice      |
-| 187 | bigbench/identify_math_theorems                                      | tasksource/bigbench                       | identify_math_theorems                              |                 | bigbench                                     | MultipleChoice      |
-| 188 | bigbench/misconceptions                                              | tasksource/bigbench                       | misconceptions                                      |                 | bigbench                                     | MultipleChoice      |
-| 189 | bigbench/sports_understanding                                        | tasksource/bigbench                       | sports_understanding                                |                 | bigbench                                     | MultipleChoice      |
-| 190 | bigbench/color                                                       | tasksource/bigbench                       | color                                               |                 | bigbench                                     | MultipleChoice      |
-| 191 | bigbench/presuppositions_as_nli                                      | tasksource/bigbench                       | presuppositions_as_nli                              |                 | bigbench                                     | MultipleChoice      |
-| 192 | bigbench/human_organs_senses                                         | tasksource/bigbench                       | human_organs_senses                                 |                 | bigbench                                     | MultipleChoice      |
-| 193 | bigbench/nonsense_words_grammar                                      | tasksource/bigbench                       | nonsense_words_grammar                              |                 | bigbench                                     | MultipleChoice      |
-| 194 | bigbench/analytic_entailment                                         | tasksource/bigbench                       | analytic_entailment                                 |                 | bigbench                                     | MultipleChoice      |
-| 195 | bigbench/abstract_narrative_understanding                            | tasksource/bigbench                       | abstract_narrative_understanding                    |                 | bigbench                                     | MultipleChoice      |
-| 196 | bigbench/formal_fallacies_syllogisms_negation                        | tasksource/bigbench                       | formal_fallacies_syllogisms_negation                |                 | bigbench                                     | MultipleChoice      |
-| 197 | bigbench/physics                                                     | tasksource/bigbench                       | physics                                             |                 | bigbench                                     | MultipleChoice      |
-| 198 | bigbench/understanding_fables                                        | tasksource/bigbench                       | understanding_fables                                |                 | bigbench                                     | MultipleChoice      |
-| 199 | bigbench/novel_concepts                                              | tasksource/bigbench                       | novel_concepts                                      |                 | bigbench                                     | MultipleChoice      |
-| 200 | bigbench/international_phonetic_alphabet_nli                         | tasksource/bigbench                       | international_phonetic_alphabet_nli                 |                 | bigbench                                     | MultipleChoice      |
-| 201 | bigbench/physical_intuition                                          | tasksource/bigbench                       | physical_intuition                                  |                 | bigbench                                     | MultipleChoice      |
-| 202 | bigbench/social_support                                              | tasksource/bigbench                       | social_support                                      |                 | bigbench                                     | MultipleChoice      |
-| 203 | bigbench/bbq_lite_json                                               | tasksource/bigbench                       | bbq_lite_json                                       |                 | bigbench                                     | MultipleChoice      |
-| 204 | bigbench/conceptual_combinations                                     | tasksource/bigbench                       | conceptual_combinations                             |                 | bigbench                                     | MultipleChoice      |
-| 205 | bigbench/english_proverbs                                            | tasksource/bigbench                       | english_proverbs                                    |                 | bigbench                                     | MultipleChoice      |
-| 206 | bigbench/logical_fallacy_detection                                   | tasksource/bigbench                       | logical_fallacy_detection                           |                 | bigbench                                     | MultipleChoice      |
-| 207 | bigbench/odd_one_out                                                 | tasksource/bigbench                       | odd_one_out                                         |                 | bigbench                                     | MultipleChoice      |
-| 208 | bigbench/logical_args                                                | tasksource/bigbench                       | logical_args                                        |                 | bigbench                                     | MultipleChoice      |
-| 209 | bigbench/real_or_fake_text                                           | tasksource/bigbench                       | real_or_fake_text                                   |                 | bigbench                                     | MultipleChoice      |
-| 210 | bigbench/figure_of_speech_detection                                  | tasksource/bigbench                       | figure_of_speech_detection                          |                 | bigbench                                     | MultipleChoice      |
-| 211 | bigbench/fact_checker                                                | tasksource/bigbench                       | fact_checker                                        |                 | bigbench                                     | MultipleChoice      |
-| 212 | bigbench/code_line_description                                       | tasksource/bigbench                       | code_line_description                               |                 | bigbench                                     | MultipleChoice      |
-| 213 | bigbench/cifar10_classification                                      | tasksource/bigbench                       | cifar10_classification                              |                 | bigbench                                     | MultipleChoice      |
-| 214 | bigbench/authorship_verification                                     | tasksource/bigbench                       | authorship_verification                             |                 | bigbench                                     | MultipleChoice      |
-| 215 | bigbench/date_understanding                                          | tasksource/bigbench                       | date_understanding                                  |                 | bigbench                                     | MultipleChoice      |
-| 216 | blimp/inchoative                                                     | blimp                                     | inchoative                                          |                 | blimp_hard                                   | MultipleChoice      |
-| 217 | blimp/existential_there_quantifiers_2                                | blimp                                     | existential_there_quantifiers_2                     |                 | blimp_hard                                   | MultipleChoice      |
-| 218 | blimp/npi_present_2                                                  | blimp                                     | npi_present_2                                       |                 | blimp_hard                                   | MultipleChoice      |
-| 219 | blimp/drop_argument                                                  | blimp                                     | drop_argument                                       |                 | blimp_hard                                   | MultipleChoice      |
-| 220 | blimp/wh_questions_object_gap                                        | blimp                                     | wh_questions_object_gap                             |                 | blimp_hard                                   | MultipleChoice      |
-| 221 | blimp/principle_A_domain_2                                           | blimp                                     | principle_A_domain_2                                |                 | blimp_hard                                   | MultipleChoice      |
-| 222 | blimp/wh_questions_subject_gap_long_distance                         | blimp                                     | wh_questions_subject_gap_long_distance              |                 | blimp_hard                                   | MultipleChoice      |
-| 223 | blimp/complex_NP_island                                              | blimp                                     | complex_NP_island                                   |                 | blimp_hard                                   | MultipleChoice      |
-| 224 | blimp/sentential_negation_npi_scope                                  | blimp                                     | sentential_negation_npi_scope                       |                 | blimp_hard                                   | MultipleChoice      |
-| 225 | blimp/wh_vs_that_with_gap                                            | blimp                                     | wh_vs_that_with_gap                                 |                 | blimp_hard                                   | MultipleChoice      |
-| 226 | blimp/tough_vs_raising_1                                             | blimp                                     | tough_vs_raising_1                                  |                 | blimp_hard                                   | MultipleChoice      |
-| 227 | blimp/wh_vs_that_with_gap_long_distance                              | blimp                                     | wh_vs_that_with_gap_long_distance                   |                 | blimp_hard                                   | MultipleChoice      |
-| 228 | blimp/principle_A_c_command                                          | blimp                                     | principle_A_c_command                               |                 | blimp_hard                                   | MultipleChoice      |
-| 229 | blimp/coordinate_structure_constraint_complex_left_branch            | blimp                                     | coordinate_structure_constraint_complex_left_branch |                 | blimp_hard                                   | MultipleChoice      |
-| 230 | blimp/coordinate_structure_constraint_object_extraction              | blimp                                     | coordinate_structure_constraint_object_extraction   |                 | blimp_hard                                   | MultipleChoice      |
-| 231 | blimp/left_branch_island_echo_question                               | blimp                                     | left_branch_island_echo_question                    |                 | blimp_hard                                   | MultipleChoice      |
-| 232 | blimp/superlative_quantifiers_1                                      | blimp                                     | superlative_quantifiers_1                           |                 | blimp_hard                                   | MultipleChoice      |
-| 233 | blimp/principle_A_reconstruction                                     | blimp                                     | principle_A_reconstruction                          |                 | blimp_hard                                   | MultipleChoice      |
-| 234 | blimp/matrix_question_npi_licensor_present                           | blimp                                     | matrix_question_npi_licensor_present                |                 | blimp_hard                                   | MultipleChoice      |
-| 235 | blimp/animate_subject_passive                                        | blimp                                     | animate_subject_passive                             |                 | blimp_hard                                   | MultipleChoice      |
-| 236 | blimp/npi_present_1                                                  | blimp                                     | npi_present_1                                       |                 | blimp_hard                                   | MultipleChoice      |
-| 237 | blimp/sentential_subject_island                                      | blimp                                     | sentential_subject_island                           |                 | blimp_hard                                   | MultipleChoice      |
+| 183 | bigbench/human_organs_senses                                         | tasksource/bigbench                       | human_organs_senses                                 |                 | bigbench                                     | MultipleChoice      |
+| 184 | bigbench/crass_ai                                                    | tasksource/bigbench                       | crass_ai                                            |                 | bigbench                                     | MultipleChoice      |
+| 185 | bigbench/date_understanding                                          | tasksource/bigbench                       | date_understanding                                  |                 | bigbench                                     | MultipleChoice      |
+| 186 | bigbench/analytic_entailment                                         | tasksource/bigbench                       | analytic_entailment                                 |                 | bigbench                                     | MultipleChoice      |
+| 187 | bigbench/similarities_abstraction                                    | tasksource/bigbench                       | similarities_abstraction                            |                 | bigbench                                     | MultipleChoice      |
+| 188 | bigbench/winowhy                                                     | tasksource/bigbench                       | winowhy                                             |                 | bigbench                                     | MultipleChoice      |
+| 189 | bigbench/mathematical_induction                                      | tasksource/bigbench                       | mathematical_induction                              |                 | bigbench                                     | MultipleChoice      |
+| 190 | bigbench/logical_args                                                | tasksource/bigbench                       | logical_args                                        |                 | bigbench                                     | MultipleChoice      |
+| 191 | bigbench/snarks                                                      | tasksource/bigbench                       | snarks                                              |                 | bigbench                                     | MultipleChoice      |
+| 192 | bigbench/anachronisms                                                | tasksource/bigbench                       | anachronisms                                        |                 | bigbench                                     | MultipleChoice      |
+| 193 | bigbench/epistemic_reasoning                                         | tasksource/bigbench                       | epistemic_reasoning                                 |                 | bigbench                                     | MultipleChoice      |
+| 194 | bigbench/reasoning_about_colored_objects                             | tasksource/bigbench                       | reasoning_about_colored_objects                     |                 | bigbench                                     | MultipleChoice      |
+| 195 | bigbench/presuppositions_as_nli                                      | tasksource/bigbench                       | presuppositions_as_nli                              |                 | bigbench                                     | MultipleChoice      |
+| 196 | bigbench/contextual_parametric_knowledge_conflicts                   | tasksource/bigbench                       | contextual_parametric_knowledge_conflicts           |                 | bigbench                                     | MultipleChoice      |
+| 197 | bigbench/bbq_lite_json                                               | tasksource/bigbench                       | bbq_lite_json                                       |                 | bigbench                                     | MultipleChoice      |
+| 198 | bigbench/fantasy_reasoning                                           | tasksource/bigbench                       | fantasy_reasoning                                   |                 | bigbench                                     | MultipleChoice      |
+| 199 | bigbench/color                                                       | tasksource/bigbench                       | color                                               |                 | bigbench                                     | MultipleChoice      |
+| 200 | bigbench/penguins_in_a_table                                         | tasksource/bigbench                       | penguins_in_a_table                                 |                 | bigbench                                     | MultipleChoice      |
+| 201 | bigbench/evaluating_information_essentiality                         | tasksource/bigbench                       | evaluating_information_essentiality                 |                 | bigbench                                     | MultipleChoice      |
+| 202 | bigbench/cs_algorithms                                               | tasksource/bigbench                       | cs_algorithms                                       |                 | bigbench                                     | MultipleChoice      |
+| 203 | bigbench/physics                                                     | tasksource/bigbench                       | physics                                             |                 | bigbench                                     | MultipleChoice      |
+| 204 | bigbench/navigate                                                    | tasksource/bigbench                       | navigate                                            |                 | bigbench                                     | MultipleChoice      |
+| 205 | bigbench/key_value_maps                                              | tasksource/bigbench                       | key_value_maps                                      |                 | bigbench                                     | MultipleChoice      |
+| 206 | bigbench/strange_stories                                             | tasksource/bigbench                       | strange_stories                                     |                 | bigbench                                     | MultipleChoice      |
+| 207 | bigbench/sports_understanding                                        | tasksource/bigbench                       | sports_understanding                                |                 | bigbench                                     | MultipleChoice      |
+| 208 | bigbench/unit_interpretation                                         | tasksource/bigbench                       | unit_interpretation                                 |                 | bigbench                                     | MultipleChoice      |
+| 209 | bigbench/general_knowledge                                           | tasksource/bigbench                       | general_knowledge                                   |                 | bigbench                                     | MultipleChoice      |
+| 210 | bigbench/authorship_verification                                     | tasksource/bigbench                       | authorship_verification                             |                 | bigbench                                     | MultipleChoice      |
+| 211 | bigbench/emoji_movie                                                 | tasksource/bigbench                       | emoji_movie                                         |                 | bigbench                                     | MultipleChoice      |
+| 212 | bigbench/symbol_interpretation                                       | tasksource/bigbench                       | symbol_interpretation                               |                 | bigbench                                     | MultipleChoice      |
+| 213 | bigbench/hyperbaton                                                  | tasksource/bigbench                       | hyperbaton                                          |                 | bigbench                                     | MultipleChoice      |
+| 214 | bigbench/implicit_relations                                          | tasksource/bigbench                       | implicit_relations                                  |                 | bigbench                                     | MultipleChoice      |
+| 215 | bigbench/social_support                                              | tasksource/bigbench                       | social_support                                      |                 | bigbench                                     | MultipleChoice      |
+| 216 | blimp/principle_A_c_command                                          | blimp                                     | principle_A_c_command                               |                 | blimp_hard                                   | MultipleChoice      |
+| 217 | blimp/wh_questions_subject_gap_long_distance                         | blimp                                     | wh_questions_subject_gap_long_distance              |                 | blimp_hard                                   | MultipleChoice      |
+| 218 | blimp/drop_argument                                                  | blimp                                     | drop_argument                                       |                 | blimp_hard                                   | MultipleChoice      |
+| 219 | blimp/coordinate_structure_constraint_object_extraction              | blimp                                     | coordinate_structure_constraint_object_extraction   |                 | blimp_hard                                   | MultipleChoice      |
+| 220 | blimp/npi_present_2                                                  | blimp                                     | npi_present_2                                       |                 | blimp_hard                                   | MultipleChoice      |
+| 221 | blimp/matrix_question_npi_licensor_present                           | blimp                                     | matrix_question_npi_licensor_present                |                 | blimp_hard                                   | MultipleChoice      |
+| 222 | blimp/superlative_quantifiers_1                                      | blimp                                     | superlative_quantifiers_1                           |                 | blimp_hard                                   | MultipleChoice      |
+| 223 | blimp/left_branch_island_echo_question                               | blimp                                     | left_branch_island_echo_question                    |                 | blimp_hard                                   | MultipleChoice      |
+| 224 | blimp/wh_vs_that_with_gap_long_distance                              | blimp                                     | wh_vs_that_with_gap_long_distance                   |                 | blimp_hard                                   | MultipleChoice      |
+| 225 | blimp/tough_vs_raising_1                                             | blimp                                     | tough_vs_raising_1                                  |                 | blimp_hard                                   | MultipleChoice      |
+| 226 | blimp/npi_present_1                                                  | blimp                                     | npi_present_1                                       |                 | blimp_hard                                   | MultipleChoice      |
+| 227 | blimp/coordinate_structure_constraint_complex_left_branch            | blimp                                     | coordinate_structure_constraint_complex_left_branch |                 | blimp_hard                                   | MultipleChoice      |
+| 228 | blimp/principle_A_domain_2                                           | blimp                                     | principle_A_domain_2                                |                 | blimp_hard                                   | MultipleChoice      |
+| 229 | blimp/wh_vs_that_with_gap                                            | blimp                                     | wh_vs_that_with_gap                                 |                 | blimp_hard                                   | MultipleChoice      |
+| 230 | blimp/sentential_subject_island                                      | blimp                                     | sentential_subject_island                           |                 | blimp_hard                                   | MultipleChoice      |
+| 231 | blimp/wh_questions_object_gap                                        | blimp                                     | wh_questions_object_gap                             |                 | blimp_hard                                   | MultipleChoice      |
+| 232 | blimp/sentential_negation_npi_scope                                  | blimp                                     | sentential_negation_npi_scope                       |                 | blimp_hard                                   | MultipleChoice      |
+| 233 | blimp/animate_subject_passive                                        | blimp                                     | animate_subject_passive                             |                 | blimp_hard                                   | MultipleChoice      |
+| 234 | blimp/existential_there_quantifiers_2                                | blimp                                     | existential_there_quantifiers_2                     |                 | blimp_hard                                   | MultipleChoice      |
+| 235 | blimp/inchoative                                                     | blimp                                     | inchoative                                          |                 | blimp_hard                                   | MultipleChoice      |
+| 236 | blimp/principle_A_reconstruction                                     | blimp                                     | principle_A_reconstruction                          |                 | blimp_hard                                   | MultipleChoice      |
+| 237 | blimp/complex_NP_island                                              | blimp                                     | complex_NP_island                                   |                 | blimp_hard                                   | MultipleChoice      |
 | 238 | cos_e/v1.0                                                           | cos_e                                     | v1.0                                                |                 | cos_e                                        | MultipleChoice      |
 | 239 | cosmos_qa                                                            | cosmos_qa                                 |                                                     |                 | cosmos_qa                                    | MultipleChoice      |
 | 240 | dream                                                                | dream                                     |                                                     |                 | dream                                        | MultipleChoice      |
@@ -256,67 +256,67 @@
 | 253 | balanced-copa                                                        | pkavumba/balanced-copa                    |                                                     |                 | balanced_copa                                | MultipleChoice      |
 | 254 | e-CARE                                                               | 12ml/e-CARE                               |                                                     |                 | e_care                                       | MultipleChoice      |
 | 255 | art                                                                  | art                                       |                                                     |                 | art                                          | MultipleChoice      |
-| 256 | mmlu/high_school_government_and_politics                             | tasksource/mmlu                           | high_school_government_and_politics                 |                 | mmlu                                         | MultipleChoice      |
-| 257 | mmlu/abstract_algebra                                                | tasksource/mmlu                           | abstract_algebra                                    |                 | mmlu                                         | MultipleChoice      |
-| 258 | mmlu/anatomy                                                         | tasksource/mmlu                           | anatomy                                             |                 | mmlu                                         | MultipleChoice      |
-| 259 | mmlu/astronomy                                                       | tasksource/mmlu                           | astronomy                                           |                 | mmlu                                         | MultipleChoice      |
-| 260 | mmlu/business_ethics                                                 | tasksource/mmlu                           | business_ethics                                     |                 | mmlu                                         | MultipleChoice      |
-| 261 | mmlu/clinical_knowledge                                              | tasksource/mmlu                           | clinical_knowledge                                  |                 | mmlu                                         | MultipleChoice      |
-| 262 | mmlu/college_biology                                                 | tasksource/mmlu                           | college_biology                                     |                 | mmlu                                         | MultipleChoice      |
-| 263 | mmlu/college_chemistry                                               | tasksource/mmlu                           | college_chemistry                                   |                 | mmlu                                         | MultipleChoice      |
-| 264 | mmlu/college_computer_science                                        | tasksource/mmlu                           | college_computer_science                            |                 | mmlu                                         | MultipleChoice      |
-| 265 | mmlu/college_mathematics                                             | tasksource/mmlu                           | college_mathematics                                 |                 | mmlu                                         | MultipleChoice      |
-| 266 | mmlu/college_medicine                                                | tasksource/mmlu                           | college_medicine                                    |                 | mmlu                                         | MultipleChoice      |
-| 267 | mmlu/logical_fallacies                                               | tasksource/mmlu                           | logical_fallacies                                   |                 | mmlu                                         | MultipleChoice      |
-| 268 | mmlu/college_physics                                                 | tasksource/mmlu                           | college_physics                                     |                 | mmlu                                         | MultipleChoice      |
-| 269 | mmlu/computer_security                                               | tasksource/mmlu                           | computer_security                                   |                 | mmlu                                         | MultipleChoice      |
-| 270 | mmlu/conceptual_physics                                              | tasksource/mmlu                           | conceptual_physics                                  |                 | mmlu                                         | MultipleChoice      |
-| 271 | mmlu/econometrics                                                    | tasksource/mmlu                           | econometrics                                        |                 | mmlu                                         | MultipleChoice      |
-| 272 | mmlu/electrical_engineering                                          | tasksource/mmlu                           | electrical_engineering                              |                 | mmlu                                         | MultipleChoice      |
-| 273 | mmlu/world_religions                                                 | tasksource/mmlu                           | world_religions                                     |                 | mmlu                                         | MultipleChoice      |
-| 274 | mmlu/elementary_mathematics                                          | tasksource/mmlu                           | elementary_mathematics                              |                 | mmlu                                         | MultipleChoice      |
-| 275 | mmlu/formal_logic                                                    | tasksource/mmlu                           | formal_logic                                        |                 | mmlu                                         | MultipleChoice      |
-| 276 | mmlu/global_facts                                                    | tasksource/mmlu                           | global_facts                                        |                 | mmlu                                         | MultipleChoice      |
-| 277 | mmlu/high_school_biology                                             | tasksource/mmlu                           | high_school_biology                                 |                 | mmlu                                         | MultipleChoice      |
-| 278 | mmlu/high_school_chemistry                                           | tasksource/mmlu                           | high_school_chemistry                               |                 | mmlu                                         | MultipleChoice      |
-| 279 | mmlu/high_school_computer_science                                    | tasksource/mmlu                           | high_school_computer_science                        |                 | mmlu                                         | MultipleChoice      |
-| 280 | mmlu/high_school_macroeconomics                                      | tasksource/mmlu                           | high_school_macroeconomics                          |                 | mmlu                                         | MultipleChoice      |
-| 281 | mmlu/high_school_mathematics                                         | tasksource/mmlu                           | high_school_mathematics                             |                 | mmlu                                         | MultipleChoice      |
-| 282 | mmlu/high_school_microeconomics                                      | tasksource/mmlu                           | high_school_microeconomics                          |                 | mmlu                                         | MultipleChoice      |
-| 283 | mmlu/high_school_physics                                             | tasksource/mmlu                           | high_school_physics                                 |                 | mmlu                                         | MultipleChoice      |
-| 284 | mmlu/jurisprudence                                                   | tasksource/mmlu                           | jurisprudence                                       |                 | mmlu                                         | MultipleChoice      |
-| 285 | mmlu/management                                                      | tasksource/mmlu                           | management                                          |                 | mmlu                                         | MultipleChoice      |
-| 286 | mmlu/international_law                                               | tasksource/mmlu                           | international_law                                   |                 | mmlu                                         | MultipleChoice      |
-| 287 | mmlu/human_sexuality                                                 | tasksource/mmlu                           | human_sexuality                                     |                 | mmlu                                         | MultipleChoice      |
-| 288 | mmlu/human_aging                                                     | tasksource/mmlu                           | human_aging                                         |                 | mmlu                                         | MultipleChoice      |
-| 289 | mmlu/high_school_world_history                                       | tasksource/mmlu                           | high_school_world_history                           |                 | mmlu                                         | MultipleChoice      |
-| 290 | mmlu/marketing                                                       | tasksource/mmlu                           | marketing                                           |                 | mmlu                                         | MultipleChoice      |
-| 291 | mmlu/medical_genetics                                                | tasksource/mmlu                           | medical_genetics                                    |                 | mmlu                                         | MultipleChoice      |
-| 292 | mmlu/high_school_us_history                                          | tasksource/mmlu                           | high_school_us_history                              |                 | mmlu                                         | MultipleChoice      |
-| 293 | mmlu/miscellaneous                                                   | tasksource/mmlu                           | miscellaneous                                       |                 | mmlu                                         | MultipleChoice      |
-| 294 | mmlu/moral_disputes                                                  | tasksource/mmlu                           | moral_disputes                                      |                 | mmlu                                         | MultipleChoice      |
-| 295 | mmlu/moral_scenarios                                                 | tasksource/mmlu                           | moral_scenarios                                     |                 | mmlu                                         | MultipleChoice      |
-| 296 | mmlu/machine_learning                                                | tasksource/mmlu                           | machine_learning                                    |                 | mmlu                                         | MultipleChoice      |
-| 297 | mmlu/nutrition                                                       | tasksource/mmlu                           | nutrition                                           |                 | mmlu                                         | MultipleChoice      |
-| 298 | mmlu/philosophy                                                      | tasksource/mmlu                           | philosophy                                          |                 | mmlu                                         | MultipleChoice      |
-| 299 | mmlu/prehistory                                                      | tasksource/mmlu                           | prehistory                                          |                 | mmlu                                         | MultipleChoice      |
-| 300 | mmlu/professional_accounting                                         | tasksource/mmlu                           | professional_accounting                             |                 | mmlu                                         | MultipleChoice      |
-| 301 | mmlu/high_school_european_history                                    | tasksource/mmlu                           | high_school_european_history                        |                 | mmlu                                         | MultipleChoice      |
-| 302 | mmlu/high_school_psychology                                          | tasksource/mmlu                           | high_school_psychology                              |                 | mmlu                                         | MultipleChoice      |
-| 303 | mmlu/professional_law                                                | tasksource/mmlu                           | professional_law                                    |                 | mmlu                                         | MultipleChoice      |
-| 304 | mmlu/professional_medicine                                           | tasksource/mmlu                           | professional_medicine                               |                 | mmlu                                         | MultipleChoice      |
-| 305 | mmlu/professional_psychology                                         | tasksource/mmlu                           | professional_psychology                             |                 | mmlu                                         | MultipleChoice      |
-| 306 | mmlu/public_relations                                                | tasksource/mmlu                           | public_relations                                    |                 | mmlu                                         | MultipleChoice      |
-| 307 | mmlu/security_studies                                                | tasksource/mmlu                           | security_studies                                    |                 | mmlu                                         | MultipleChoice      |
-| 308 | mmlu/sociology                                                       | tasksource/mmlu                           | sociology                                           |                 | mmlu                                         | MultipleChoice      |
-| 309 | mmlu/us_foreign_policy                                               | tasksource/mmlu                           | us_foreign_policy                                   |                 | mmlu                                         | MultipleChoice      |
-| 310 | mmlu/virology                                                        | tasksource/mmlu                           | virology                                            |                 | mmlu                                         | MultipleChoice      |
-| 311 | mmlu/high_school_statistics                                          | tasksource/mmlu                           | high_school_statistics                              |                 | mmlu                                         | MultipleChoice      |
-| 312 | mmlu/high_school_geography                                           | tasksource/mmlu                           | high_school_geography                               |                 | mmlu                                         | MultipleChoice      |
+| 256 | mmlu/high_school_european_history                                    | tasksource/mmlu                           | high_school_european_history                        |                 | mmlu                                         | MultipleChoice      |
+| 257 | mmlu/jurisprudence                                                   | tasksource/mmlu                           | jurisprudence                                       |                 | mmlu                                         | MultipleChoice      |
+| 258 | mmlu/logical_fallacies                                               | tasksource/mmlu                           | logical_fallacies                                   |                 | mmlu                                         | MultipleChoice      |
+| 259 | mmlu/machine_learning                                                | tasksource/mmlu                           | machine_learning                                    |                 | mmlu                                         | MultipleChoice      |
+| 260 | mmlu/high_school_physics                                             | tasksource/mmlu                           | high_school_physics                                 |                 | mmlu                                         | MultipleChoice      |
+| 261 | mmlu/high_school_psychology                                          | tasksource/mmlu                           | high_school_psychology                              |                 | mmlu                                         | MultipleChoice      |
+| 262 | mmlu/high_school_statistics                                          | tasksource/mmlu                           | high_school_statistics                              |                 | mmlu                                         | MultipleChoice      |
+| 263 | mmlu/abstract_algebra                                                | tasksource/mmlu                           | abstract_algebra                                    |                 | mmlu                                         | MultipleChoice      |
+| 264 | mmlu/college_mathematics                                             | tasksource/mmlu                           | college_mathematics                                 |                 | mmlu                                         | MultipleChoice      |
+| 265 | mmlu/management                                                      | tasksource/mmlu                           | management                                          |                 | mmlu                                         | MultipleChoice      |
+| 266 | mmlu/marketing                                                       | tasksource/mmlu                           | marketing                                           |                 | mmlu                                         | MultipleChoice      |
+| 267 | mmlu/medical_genetics                                                | tasksource/mmlu                           | medical_genetics                                    |                 | mmlu                                         | MultipleChoice      |
+| 268 | mmlu/miscellaneous                                                   | tasksource/mmlu                           | miscellaneous                                       |                 | mmlu                                         | MultipleChoice      |
+| 269 | mmlu/high_school_microeconomics                                      | tasksource/mmlu                           | high_school_microeconomics                          |                 | mmlu                                         | MultipleChoice      |
+| 270 | mmlu/high_school_mathematics                                         | tasksource/mmlu                           | high_school_mathematics                             |                 | mmlu                                         | MultipleChoice      |
+| 271 | mmlu/business_ethics                                                 | tasksource/mmlu                           | business_ethics                                     |                 | mmlu                                         | MultipleChoice      |
+| 272 | mmlu/clinical_knowledge                                              | tasksource/mmlu                           | clinical_knowledge                                  |                 | mmlu                                         | MultipleChoice      |
+| 273 | mmlu/college_biology                                                 | tasksource/mmlu                           | college_biology                                     |                 | mmlu                                         | MultipleChoice      |
+| 274 | mmlu/college_chemistry                                               | tasksource/mmlu                           | college_chemistry                                   |                 | mmlu                                         | MultipleChoice      |
+| 275 | mmlu/high_school_biology                                             | tasksource/mmlu                           | high_school_biology                                 |                 | mmlu                                         | MultipleChoice      |
+| 276 | mmlu/high_school_chemistry                                           | tasksource/mmlu                           | high_school_chemistry                               |                 | mmlu                                         | MultipleChoice      |
+| 277 | mmlu/high_school_computer_science                                    | tasksource/mmlu                           | high_school_computer_science                        |                 | mmlu                                         | MultipleChoice      |
+| 278 | mmlu/professional_psychology                                         | tasksource/mmlu                           | professional_psychology                             |                 | mmlu                                         | MultipleChoice      |
+| 279 | mmlu/public_relations                                                | tasksource/mmlu                           | public_relations                                    |                 | mmlu                                         | MultipleChoice      |
+| 280 | mmlu/security_studies                                                | tasksource/mmlu                           | security_studies                                    |                 | mmlu                                         | MultipleChoice      |
+| 281 | mmlu/sociology                                                       | tasksource/mmlu                           | sociology                                           |                 | mmlu                                         | MultipleChoice      |
+| 282 | mmlu/us_foreign_policy                                               | tasksource/mmlu                           | us_foreign_policy                                   |                 | mmlu                                         | MultipleChoice      |
+| 283 | mmlu/virology                                                        | tasksource/mmlu                           | virology                                            |                 | mmlu                                         | MultipleChoice      |
+| 284 | mmlu/moral_disputes                                                  | tasksource/mmlu                           | moral_disputes                                      |                 | mmlu                                         | MultipleChoice      |
+| 285 | mmlu/moral_scenarios                                                 | tasksource/mmlu                           | moral_scenarios                                     |                 | mmlu                                         | MultipleChoice      |
+| 286 | mmlu/nutrition                                                       | tasksource/mmlu                           | nutrition                                           |                 | mmlu                                         | MultipleChoice      |
+| 287 | mmlu/philosophy                                                      | tasksource/mmlu                           | philosophy                                          |                 | mmlu                                         | MultipleChoice      |
+| 288 | mmlu/prehistory                                                      | tasksource/mmlu                           | prehistory                                          |                 | mmlu                                         | MultipleChoice      |
+| 289 | mmlu/professional_accounting                                         | tasksource/mmlu                           | professional_accounting                             |                 | mmlu                                         | MultipleChoice      |
+| 290 | mmlu/professional_law                                                | tasksource/mmlu                           | professional_law                                    |                 | mmlu                                         | MultipleChoice      |
+| 291 | mmlu/professional_medicine                                           | tasksource/mmlu                           | professional_medicine                               |                 | mmlu                                         | MultipleChoice      |
+| 292 | mmlu/college_medicine                                                | tasksource/mmlu                           | college_medicine                                    |                 | mmlu                                         | MultipleChoice      |
+| 293 | mmlu/college_physics                                                 | tasksource/mmlu                           | college_physics                                     |                 | mmlu                                         | MultipleChoice      |
+| 294 | mmlu/computer_security                                               | tasksource/mmlu                           | computer_security                                   |                 | mmlu                                         | MultipleChoice      |
+| 295 | mmlu/conceptual_physics                                              | tasksource/mmlu                           | conceptual_physics                                  |                 | mmlu                                         | MultipleChoice      |
+| 296 | mmlu/econometrics                                                    | tasksource/mmlu                           | econometrics                                        |                 | mmlu                                         | MultipleChoice      |
+| 297 | mmlu/electrical_engineering                                          | tasksource/mmlu                           | electrical_engineering                              |                 | mmlu                                         | MultipleChoice      |
+| 298 | mmlu/elementary_mathematics                                          | tasksource/mmlu                           | elementary_mathematics                              |                 | mmlu                                         | MultipleChoice      |
+| 299 | mmlu/formal_logic                                                    | tasksource/mmlu                           | formal_logic                                        |                 | mmlu                                         | MultipleChoice      |
+| 300 | mmlu/anatomy                                                         | tasksource/mmlu                           | anatomy                                             |                 | mmlu                                         | MultipleChoice      |
+| 301 | mmlu/astronomy                                                       | tasksource/mmlu                           | astronomy                                           |                 | mmlu                                         | MultipleChoice      |
+| 302 | mmlu/high_school_geography                                           | tasksource/mmlu                           | high_school_geography                               |                 | mmlu                                         | MultipleChoice      |
+| 303 | mmlu/high_school_government_and_politics                             | tasksource/mmlu                           | high_school_government_and_politics                 |                 | mmlu                                         | MultipleChoice      |
+| 304 | mmlu/global_facts                                                    | tasksource/mmlu                           | global_facts                                        |                 | mmlu                                         | MultipleChoice      |
+| 305 | mmlu/world_religions                                                 | tasksource/mmlu                           | world_religions                                     |                 | mmlu                                         | MultipleChoice      |
+| 306 | mmlu/international_law                                               | tasksource/mmlu                           | international_law                                   |                 | mmlu                                         | MultipleChoice      |
+| 307 | mmlu/high_school_us_history                                          | tasksource/mmlu                           | high_school_us_history                              |                 | mmlu                                         | MultipleChoice      |
+| 308 | mmlu/high_school_world_history                                       | tasksource/mmlu                           | high_school_world_history                           |                 | mmlu                                         | MultipleChoice      |
+| 309 | mmlu/human_aging                                                     | tasksource/mmlu                           | human_aging                                         |                 | mmlu                                         | MultipleChoice      |
+| 310 | mmlu/human_sexuality                                                 | tasksource/mmlu                           | human_sexuality                                     |                 | mmlu                                         | MultipleChoice      |
+| 311 | mmlu/high_school_macroeconomics                                      | tasksource/mmlu                           | high_school_macroeconomics                          |                 | mmlu                                         | MultipleChoice      |
+| 312 | mmlu/college_computer_science                                        | tasksource/mmlu                           | college_computer_science                            |                 | mmlu                                         | MultipleChoice      |
 | 313 | winogrande/winogrande_xl                                             | winogrande                                | winogrande_xl                                       |                 | winogrande                                   | MultipleChoice      |
 | 314 | codah/codah                                                          | codah                                     | codah                                               |                 | codah                                        | MultipleChoice      |
-| 315 | ai2_arc/ARC-Challenge/challenge                                      | ai2_arc                                   | ARC-Challenge                                       | challenge       | ai2_arc__challenge                           | MultipleChoice      |
-| 316 | ai2_arc/ARC-Easy/challenge                                           | ai2_arc                                   | ARC-Easy                                            | challenge       | ai2_arc__challenge                           | MultipleChoice      |
+| 315 | ai2_arc/ARC-Easy/challenge                                           | ai2_arc                                   | ARC-Easy                                            | challenge       | ai2_arc__challenge                           | MultipleChoice      |
+| 316 | ai2_arc/ARC-Challenge/challenge                                      | ai2_arc                                   | ARC-Challenge                                       | challenge       | ai2_arc__challenge                           | MultipleChoice      |
 | 317 | definite_pronoun_resolution                                          | definite_pronoun_resolution               |                                                     |                 | definite_pronoun_resolution                  | MultipleChoice      |
 | 318 | swag/regular                                                         | swag                                      | regular                                             |                 | swag___regular                               | MultipleChoice      |
 | 319 | math_qa                                                              | math_qa                                   |                                                     |                 | math_qa                                      | MultipleChoice      |
@@ -333,46 +333,46 @@
 | 330 | rumoureval_2019/RumourEval2019                                       | strombergnlp/rumoureval_2019              | RumourEval2019                                      |                 | rumoureval_2019                              | Classification      |
 | 331 | ethos/binary                                                         | ethos                                     | binary                                              |                 | ethos___binary                               | Classification      |
 | 332 | ethos/multilabel                                                     | ethos                                     | multilabel                                          |                 | ethos___multilabel                           | Classification      |
-| 333 | tweet_eval/stance_hillary                                            | tweet_eval                                | stance_hillary                                      |                 | tweet_eval                                   | Classification      |
-| 334 | tweet_eval/stance_feminist                                           | tweet_eval                                | stance_feminist                                     |                 | tweet_eval                                   | Classification      |
-| 335 | tweet_eval/stance_climate                                            | tweet_eval                                | stance_climate                                      |                 | tweet_eval                                   | Classification      |
-| 336 | tweet_eval/stance_atheism                                            | tweet_eval                                | stance_atheism                                      |                 | tweet_eval                                   | Classification      |
-| 337 | tweet_eval/emoji                                                     | tweet_eval                                | emoji                                               |                 | tweet_eval                                   | Classification      |
+| 333 | tweet_eval/irony                                                     | tweet_eval                                | irony                                               |                 | tweet_eval                                   | Classification      |
+| 334 | tweet_eval/hate                                                      | tweet_eval                                | hate                                                |                 | tweet_eval                                   | Classification      |
+| 335 | tweet_eval/emotion                                                   | tweet_eval                                | emotion                                             |                 | tweet_eval                                   | Classification      |
+| 336 | tweet_eval/emoji                                                     | tweet_eval                                | emoji                                               |                 | tweet_eval                                   | Classification      |
+| 337 | tweet_eval/offensive                                                 | tweet_eval                                | offensive                                           |                 | tweet_eval                                   | Classification      |
 | 338 | tweet_eval/sentiment                                                 | tweet_eval                                | sentiment                                           |                 | tweet_eval                                   | Classification      |
-| 339 | tweet_eval/offensive                                                 | tweet_eval                                | offensive                                           |                 | tweet_eval                                   | Classification      |
-| 340 | tweet_eval/irony                                                     | tweet_eval                                | irony                                               |                 | tweet_eval                                   | Classification      |
-| 341 | tweet_eval/hate                                                      | tweet_eval                                | hate                                                |                 | tweet_eval                                   | Classification      |
-| 342 | tweet_eval/emotion                                                   | tweet_eval                                | emotion                                             |                 | tweet_eval                                   | Classification      |
-| 343 | tweet_eval/stance_abortion                                           | tweet_eval                                | stance_abortion                                     |                 | tweet_eval                                   | Classification      |
+| 339 | tweet_eval/stance_abortion                                           | tweet_eval                                | stance_abortion                                     |                 | tweet_eval_abortion                          | Classification      |
+| 340 | tweet_eval/stance_atheism                                            | tweet_eval                                | stance_atheism                                      |                 | tweet_eval_atheism                           | Classification      |
+| 341 | tweet_eval/stance_climate                                            | tweet_eval                                | stance_climate                                      |                 | tweet_eval_climate                           | Classification      |
+| 342 | tweet_eval/stance_feminist                                           | tweet_eval                                | stance_feminist                                     |                 | tweet_eval_feminist                          | Classification      |
+| 343 | tweet_eval/stance_hillary                                            | tweet_eval                                | stance_hillary                                      |                 | tweet_eval_hillary                           | Classification      |
 | 344 | discovery/discovery                                                  | discovery                                 | discovery                                           |                 | discovery                                    | Classification      |
-| 345 | pragmeval/squinky-formality                                          | pragmeval                                 | squinky-formality                                   |                 | pragmeval_1                                  | Classification      |
-| 346 | pragmeval/squinky-implicature                                        | pragmeval                                 | squinky-implicature                                 |                 | pragmeval_1                                  | Classification      |
-| 347 | pragmeval/emobank-dominance                                          | pragmeval                                 | emobank-dominance                                   |                 | pragmeval_1                                  | Classification      |
-| 348 | pragmeval/squinky-informativeness                                    | pragmeval                                 | squinky-informativeness                             |                 | pragmeval_1                                  | Classification      |
+| 345 | pragmeval/emobank-valence                                            | pragmeval                                 | emobank-valence                                     |                 | pragmeval_1                                  | Classification      |
+| 346 | pragmeval/squinky-informativeness                                    | pragmeval                                 | squinky-informativeness                             |                 | pragmeval_1                                  | Classification      |
+| 347 | pragmeval/squinky-implicature                                        | pragmeval                                 | squinky-implicature                                 |                 | pragmeval_1                                  | Classification      |
+| 348 | pragmeval/squinky-formality                                          | pragmeval                                 | squinky-formality                                   |                 | pragmeval_1                                  | Classification      |
 | 349 | pragmeval/emobank-arousal                                            | pragmeval                                 | emobank-arousal                                     |                 | pragmeval_1                                  | Classification      |
-| 350 | pragmeval/switchboard                                                | pragmeval                                 | switchboard                                         |                 | pragmeval_1                                  | Classification      |
-| 351 | pragmeval/mrda                                                       | pragmeval                                 | mrda                                                |                 | pragmeval_1                                  | Classification      |
+| 350 | pragmeval/emobank-dominance                                          | pragmeval                                 | emobank-dominance                                   |                 | pragmeval_1                                  | Classification      |
+| 351 | pragmeval/switchboard                                                | pragmeval                                 | switchboard                                         |                 | pragmeval_1                                  | Classification      |
 | 352 | pragmeval/verifiability                                              | pragmeval                                 | verifiability                                       |                 | pragmeval_1                                  | Classification      |
-| 353 | pragmeval/emobank-valence                                            | pragmeval                                 | emobank-valence                                     |                 | pragmeval_1                                  | Classification      |
-| 354 | pragmeval/emergent                                                   | pragmeval                                 | emergent                                            |                 | pragmeval_2                                  | Classification      |
-| 355 | pragmeval/gum                                                        | pragmeval                                 | gum                                                 |                 | pragmeval_2                                  | Classification      |
-| 356 | pragmeval/stac                                                       | pragmeval                                 | stac                                                |                 | pragmeval_2                                  | Classification      |
-| 357 | pragmeval/persuasiveness-eloquence                                   | pragmeval                                 | persuasiveness-eloquence                            |                 | pragmeval_2                                  | Classification      |
-| 358 | pragmeval/persuasiveness-premisetype                                 | pragmeval                                 | persuasiveness-premisetype                          |                 | pragmeval_2                                  | Classification      |
-| 359 | pragmeval/persuasiveness-relevance                                   | pragmeval                                 | persuasiveness-relevance                            |                 | pragmeval_2                                  | Classification      |
-| 360 | pragmeval/persuasiveness-specificity                                 | pragmeval                                 | persuasiveness-specificity                          |                 | pragmeval_2                                  | Classification      |
-| 361 | pragmeval/persuasiveness-strength                                    | pragmeval                                 | persuasiveness-strength                             |                 | pragmeval_2                                  | Classification      |
+| 353 | pragmeval/mrda                                                       | pragmeval                                 | mrda                                                |                 | pragmeval_1                                  | Classification      |
+| 354 | pragmeval/persuasiveness-eloquence                                   | pragmeval                                 | persuasiveness-eloquence                            |                 | pragmeval_2                                  | Classification      |
+| 355 | pragmeval/pdtb                                                       | pragmeval                                 | pdtb                                                |                 | pragmeval_2                                  | Classification      |
+| 356 | pragmeval/persuasiveness-specificity                                 | pragmeval                                 | persuasiveness-specificity                          |                 | pragmeval_2                                  | Classification      |
+| 357 | pragmeval/persuasiveness-relevance                                   | pragmeval                                 | persuasiveness-relevance                            |                 | pragmeval_2                                  | Classification      |
+| 358 | pragmeval/persuasiveness-claimtype                                   | pragmeval                                 | persuasiveness-claimtype                            |                 | pragmeval_2                                  | Classification      |
+| 359 | pragmeval/emergent                                                   | pragmeval                                 | emergent                                            |                 | pragmeval_2                                  | Classification      |
+| 360 | pragmeval/gum                                                        | pragmeval                                 | gum                                                 |                 | pragmeval_2                                  | Classification      |
+| 361 | pragmeval/stac                                                       | pragmeval                                 | stac                                                |                 | pragmeval_2                                  | Classification      |
 | 362 | pragmeval/sarcasm                                                    | pragmeval                                 | sarcasm                                             |                 | pragmeval_2                                  | Classification      |
-| 363 | pragmeval/persuasiveness-claimtype                                   | pragmeval                                 | persuasiveness-claimtype                            |                 | pragmeval_2                                  | Classification      |
-| 364 | pragmeval/pdtb                                                       | pragmeval                                 | pdtb                                                |                 | pragmeval_2                                  | Classification      |
+| 363 | pragmeval/persuasiveness-strength                                    | pragmeval                                 | persuasiveness-strength                             |                 | pragmeval_2                                  | Classification      |
+| 364 | pragmeval/persuasiveness-premisetype                                 | pragmeval                                 | persuasiveness-premisetype                          |                 | pragmeval_2                                  | Classification      |
 | 365 | silicone/iemocap                                                     | silicone                                  | iemocap                                             |                 | silicone                                     | Classification      |
-| 366 | silicone/sem                                                         | silicone                                  | sem                                                 |                 | silicone                                     | Classification      |
+| 366 | silicone/meld_e                                                      | silicone                                  | meld_e                                              |                 | silicone                                     | Classification      |
 | 367 | silicone/oasis                                                       | silicone                                  | oasis                                               |                 | silicone                                     | Classification      |
-| 368 | silicone/meld_s                                                      | silicone                                  | meld_s                                              |                 | silicone                                     | Classification      |
-| 369 | silicone/meld_e                                                      | silicone                                  | meld_e                                              |                 | silicone                                     | Classification      |
-| 370 | silicone/maptask                                                     | silicone                                  | maptask                                             |                 | silicone                                     | Classification      |
-| 371 | silicone/dyda_e                                                      | silicone                                  | dyda_e                                              |                 | silicone                                     | Classification      |
-| 372 | silicone/dyda_da                                                     | silicone                                  | dyda_da                                             |                 | silicone                                     | Classification      |
+| 368 | silicone/sem                                                         | silicone                                  | sem                                                 |                 | silicone                                     | Classification      |
+| 369 | silicone/dyda_da                                                     | silicone                                  | dyda_da                                             |                 | silicone                                     | Classification      |
+| 370 | silicone/meld_s                                                      | silicone                                  | meld_s                                              |                 | silicone                                     | Classification      |
+| 371 | silicone/maptask                                                     | silicone                                  | maptask                                             |                 | silicone                                     | Classification      |
+| 372 | silicone/dyda_e                                                      | silicone                                  | dyda_e                                              |                 | silicone                                     | Classification      |
 | 373 | lex_glue/eurlex                                                      | lex_glue                                  | eurlex                                              |                 | lex_glue___eurlex                            | Classification      |
 | 374 | lex_glue/scotus                                                      | lex_glue                                  | scotus                                              |                 | lex_glue___scotus                            | Classification      |
 | 375 | lex_glue/ledgar                                                      | lex_glue                                  | ledgar                                              |                 | lex_glue___ledgar                            | Classification      |
@@ -400,31 +400,31 @@
 | 397 | sciie                                                                | zapsdcn/sciie                             |                                                     |                 | scierc                                       | Classification      |
 | 398 | citation_intent                                                      | zapsdcn/citation_intent                   |                                                     |                 | citation_intent                              | Classification      |
 | 399 | go_emotions/simplified                                               | go_emotions                               | simplified                                          |                 | go_emotions___simplified                     | Classification      |
-| 400 | scicite                                                              | scicite                                   |                                                     |                 | scicite                                      | Classification      |
+| 400 | scicite                                                              | allenai/scicite                           |                                                     |                 | scicite                                      | Classification      |
 | 401 | liar                                                                 | liar                                      |                                                     |                 | liar                                         | Classification      |
-| 402 | lexical_relation_classification/K&H+N                                | relbert/lexical_relation_classification   | K&H+N                                               |                 | relbert_lexical_relation_classification      | Classification      |
-| 403 | lexical_relation_classification/CogALexV                             | relbert/lexical_relation_classification   | CogALexV                                            |                 | relbert_lexical_relation_classification      | Classification      |
-| 404 | lexical_relation_classification/BLESS                                | relbert/lexical_relation_classification   | BLESS                                               |                 | relbert_lexical_relation_classification      | Classification      |
-| 405 | lexical_relation_classification/ROOT09                               | relbert/lexical_relation_classification   | ROOT09                                              |                 | relbert_lexical_relation_classification      | Classification      |
-| 406 | lexical_relation_classification/EVALution                            | relbert/lexical_relation_classification   | EVALution                                           |                 | relbert_lexical_relation_classification      | Classification      |
-| 407 | linguisticprobing/bigram_shift                                       | metaeval/linguisticprobing                | bigram_shift                                        |                 | metaeval_linguisticprobing                   | Classification      |
-| 408 | linguisticprobing/top_constituents                                   | metaeval/linguisticprobing                | top_constituents                                    |                 | metaeval_linguisticprobing                   | Classification      |
-| 409 | linguisticprobing/subj_number                                        | metaeval/linguisticprobing                | subj_number                                         |                 | metaeval_linguisticprobing                   | Classification      |
-| 410 | linguisticprobing/odd_man_out                                        | metaeval/linguisticprobing                | odd_man_out                                         |                 | metaeval_linguisticprobing                   | Classification      |
-| 411 | linguisticprobing/tree_depth                                         | metaeval/linguisticprobing                | tree_depth                                          |                 | metaeval_linguisticprobing                   | Classification      |
-| 412 | linguisticprobing/past_present                                       | metaeval/linguisticprobing                | past_present                                        |                 | metaeval_linguisticprobing                   | Classification      |
-| 413 | linguisticprobing/sentence_length                                    | metaeval/linguisticprobing                | sentence_length                                     |                 | metaeval_linguisticprobing                   | Classification      |
+| 402 | lexical_relation_classification/ROOT09                               | relbert/lexical_relation_classification   | ROOT09                                              |                 | relbert_lexical_relation_classification      | Classification      |
+| 403 | lexical_relation_classification/EVALution                            | relbert/lexical_relation_classification   | EVALution                                           |                 | relbert_lexical_relation_classification      | Classification      |
+| 404 | lexical_relation_classification/CogALexV                             | relbert/lexical_relation_classification   | CogALexV                                            |                 | relbert_lexical_relation_classification      | Classification      |
+| 405 | lexical_relation_classification/BLESS                                | relbert/lexical_relation_classification   | BLESS                                               |                 | relbert_lexical_relation_classification      | Classification      |
+| 406 | lexical_relation_classification/K&H+N                                | relbert/lexical_relation_classification   | K&H+N                                               |                 | relbert_lexical_relation_classification      | Classification      |
+| 407 | linguisticprobing/odd_man_out                                        | metaeval/linguisticprobing                | odd_man_out                                         |                 | metaeval_linguisticprobing                   | Classification      |
+| 408 | linguisticprobing/bigram_shift                                       | metaeval/linguisticprobing                | bigram_shift                                        |                 | metaeval_linguisticprobing                   | Classification      |
+| 409 | linguisticprobing/tree_depth                                         | metaeval/linguisticprobing                | tree_depth                                          |                 | metaeval_linguisticprobing                   | Classification      |
+| 410 | linguisticprobing/past_present                                       | metaeval/linguisticprobing                | past_present                                        |                 | metaeval_linguisticprobing                   | Classification      |
+| 411 | linguisticprobing/sentence_length                                    | metaeval/linguisticprobing                | sentence_length                                     |                 | metaeval_linguisticprobing                   | Classification      |
+| 412 | linguisticprobing/top_constituents                                   | metaeval/linguisticprobing                | top_constituents                                    |                 | metaeval_linguisticprobing                   | Classification      |
+| 413 | linguisticprobing/subj_number                                        | metaeval/linguisticprobing                | subj_number                                         |                 | metaeval_linguisticprobing                   | Classification      |
 | 414 | linguisticprobing/obj_number                                         | metaeval/linguisticprobing                | obj_number                                          |                 | metaeval_linguisticprobing                   | Classification      |
 | 415 | linguisticprobing/coordination_inversion                             | metaeval/linguisticprobing                | coordination_inversion                              |                 | metaeval_linguisticprobing                   | Classification      |
-| 416 | crowdflower/political-media-audience                                 | metaeval/crowdflower                      | political-media-audience                            |                 | metaeval_crowdflower                         | Classification      |
-| 417 | crowdflower/text_emotion                                             | metaeval/crowdflower                      | text_emotion                                        |                 | metaeval_crowdflower                         | Classification      |
-| 418 | crowdflower/economic-news                                            | metaeval/crowdflower                      | economic-news                                       |                 | metaeval_crowdflower                         | Classification      |
-| 419 | crowdflower/corporate-messaging                                      | metaeval/crowdflower                      | corporate-messaging                                 |                 | metaeval_crowdflower                         | Classification      |
-| 420 | crowdflower/airline-sentiment                                        | metaeval/crowdflower                      | airline-sentiment                                   |                 | metaeval_crowdflower                         | Classification      |
-| 421 | crowdflower/tweet_global_warming                                     | metaeval/crowdflower                      | tweet_global_warming                                |                 | metaeval_crowdflower                         | Classification      |
-| 422 | crowdflower/sentiment_nuclear_power                                  | metaeval/crowdflower                      | sentiment_nuclear_power                             |                 | metaeval_crowdflower                         | Classification      |
-| 423 | crowdflower/political-media-bias                                     | metaeval/crowdflower                      | political-media-bias                                |                 | metaeval_crowdflower                         | Classification      |
-| 424 | crowdflower/political-media-message                                  | metaeval/crowdflower                      | political-media-message                             |                 | metaeval_crowdflower                         | Classification      |
+| 416 | crowdflower/airline-sentiment                                        | metaeval/crowdflower                      | airline-sentiment                                   |                 | metaeval_crowdflower                         | Classification      |
+| 417 | crowdflower/economic-news                                            | metaeval/crowdflower                      | economic-news                                       |                 | metaeval_crowdflower                         | Classification      |
+| 418 | crowdflower/corporate-messaging                                      | metaeval/crowdflower                      | corporate-messaging                                 |                 | metaeval_crowdflower                         | Classification      |
+| 419 | crowdflower/text_emotion                                             | metaeval/crowdflower                      | text_emotion                                        |                 | metaeval_crowdflower                         | Classification      |
+| 420 | crowdflower/political-media-message                                  | metaeval/crowdflower                      | political-media-message                             |                 | metaeval_crowdflower                         | Classification      |
+| 421 | crowdflower/political-media-bias                                     | metaeval/crowdflower                      | political-media-bias                                |                 | metaeval_crowdflower                         | Classification      |
+| 422 | crowdflower/political-media-audience                                 | metaeval/crowdflower                      | political-media-audience                            |                 | metaeval_crowdflower                         | Classification      |
+| 423 | crowdflower/sentiment_nuclear_power                                  | metaeval/crowdflower                      | sentiment_nuclear_power                             |                 | metaeval_crowdflower                         | Classification      |
+| 424 | crowdflower/tweet_global_warming                                     | metaeval/crowdflower                      | tweet_global_warming                                |                 | metaeval_crowdflower                         | Classification      |
 | 425 | ethics/commonsense                                                   | metaeval/ethics                           | commonsense                                         |                 | metaeval_ethics___commonsense                | Classification      |
 | 426 | ethics/deontology                                                    | metaeval/ethics                           | deontology                                          |                 | metaeval_ethics___deontology                 | Classification      |
 | 427 | ethics/justice                                                       | metaeval/ethics                           | justice                                             |                 | metaeval_ethics___justice                    | Classification      |
@@ -440,173 +440,179 @@
 | 437 | ontonotes_english/SpeedOfMagic--ontonotes_english                    | SpeedOfMagic/ontonotes_english            | SpeedOfMagic--ontonotes_english                     |                 | SpeedOfMagic_ontonotes_english               | TokenClassification |
 | 438 | blog_authorship_corpus/gender                                        | blog_authorship_corpus                    |                                                     | gender          | blog_authorship_corpus__gender               | Classification      |
 | 439 | blog_authorship_corpus/age                                           | blog_authorship_corpus                    |                                                     | age             | blog_authorship_corpus__age                  | Classification      |
-| 440 | blog_authorship_corpus/horoscope                                     | blog_authorship_corpus                    |                                                     | horoscope       | blog_authorship_corpus__horoscope            | Classification      |
-| 441 | blog_authorship_corpus/job                                           | blog_authorship_corpus                    |                                                     | job             | blog_authorship_corpus__job                  | Classification      |
-| 442 | open_question_type                                                   | launch/open_question_type                 |                                                     |                 | launch_open_question_type                    | Classification      |
-| 443 | health_fact                                                          | health_fact                               |                                                     |                 | health_fact                                  | Classification      |
-| 444 | commonsense_qa                                                       | commonsense_qa                            |                                                     |                 | commonsense_qa                               | MultipleChoice      |
-| 445 | mc_taco                                                              | mc_taco                                   |                                                     |                 | mc_taco                                      | Classification      |
-| 446 | ade_corpus_v2/Ade_corpus_v2_classification                           | ade_corpus_v2                             | Ade_corpus_v2_classification                        |                 | ade_corpus_v2___Ade_corpus_v2_classification | Classification      |
-| 447 | discosense                                                           | prajjwal1/discosense                      |                                                     |                 | discosense                                   | MultipleChoice      |
-| 448 | circa                                                                | circa                                     |                                                     |                 | circa                                        | Classification      |
-| 449 | EffectiveFeedbackStudentWriting                                      | YaHi/EffectiveFeedbackStudentWriting      |                                                     |                 | effective_feedback_student_writing           | Classification      |
-| 450 | phrase_similarity                                                    | PiC/phrase_similarity                     |                                                     |                 | phrase_similarity                            | Classification      |
-| 451 | scientific-exaggeration-detection                                    | copenlu/scientific-exaggeration-detection |                                                     |                 | exaggeration_detection                       | Classification      |
-| 452 | quarel                                                               | quarel                                    |                                                     |                 | quarel                                       | Classification      |
-| 453 | fever-evidence-related/mwong--fever-related                          | mwong/fever-evidence-related              | mwong--fever-related                                |                 | mwong_fever_evidence_related                 | Classification      |
-| 454 | numer_sense                                                          | numer_sense                               |                                                     |                 | numer_sense                                  | Classification      |
-| 455 | dynasent/dynabench.dynasent.r1.all/r1                                | dynabench/dynasent                        | dynabench.dynasent.r1.all                           | r1              | dynasent__r1                                 | Classification      |
-| 456 | dynasent/dynabench.dynasent.r2.all/r2                                | dynabench/dynasent                        | dynabench.dynasent.r2.all                           | r2              | dynasent__r2                                 | Classification      |
-| 457 | Sarcasm_News_Headline                                                | raquiba/Sarcasm_News_Headline             |                                                     |                 | sarcasm_news                                 | Classification      |
-| 458 | sem_eval_2010_task_8                                                 | sem_eval_2010_task_8                      |                                                     |                 | sem_eval_2010_task_8                         | Classification      |
-| 459 | auditor_review/demo-org--auditor_review                              | demo-org/auditor_review                   | demo-org--auditor_review                            |                 | demo_org_auditor_review                      | Classification      |
-| 460 | medmcqa                                                              | medmcqa                                   |                                                     |                 | medmcqa                                      | MultipleChoice      |
-| 461 | Dynasent_Disagreement                                                | RuyuanWan/Dynasent_Disagreement           |                                                     |                 | dynasent_disagreement                        | Classification      |
-| 462 | Politeness_Disagreement                                              | RuyuanWan/Politeness_Disagreement         |                                                     |                 | politeness_disagreement                      | Classification      |
-| 463 | SBIC_Disagreement                                                    | RuyuanWan/SBIC_Disagreement               |                                                     |                 | sbic_disagreement                            | Classification      |
-| 464 | SChem_Disagreement                                                   | RuyuanWan/SChem_Disagreement              |                                                     |                 | schem_disagreement                           | Classification      |
-| 465 | Dilemmas_Disagreement                                                | RuyuanWan/Dilemmas_Disagreement           |                                                     |                 | dilemmas_disagreement                        | Classification      |
-| 466 | logiqa                                                               | lucasmccabe/logiqa                        |                                                     |                 | logiqa                                       | MultipleChoice      |
-| 467 | wiki_qa                                                              | wiki_qa                                   |                                                     |                 | wiki_qa                                      | Classification      |
-| 468 | cycic_classification                                                 | metaeval/cycic_classification             |                                                     |                 | cycic_classification                         | Classification      |
-| 469 | cycic_multiplechoice                                                 | metaeval/cycic_multiplechoice             |                                                     |                 | cycic_mc                                     | MultipleChoice      |
-| 470 | sts-companion                                                        | metaeval/sts-companion                    |                                                     |                 | sts_companion                                | Classification      |
-| 471 | commonsense_qa_2.0                                                   | metaeval/commonsense_qa_2.0               |                                                     |                 | commonsense_qa_2                             | Classification      |
-| 472 | lingnli                                                              | metaeval/lingnli                          |                                                     |                 | ling_nli                                     | Classification      |
-| 473 | monotonicity-entailment                                              | metaeval/monotonicity-entailment          |                                                     |                 | monotonicity_entailment                      | Classification      |
-| 474 | arct                                                                 | metaeval/arct                             |                                                     |                 | arct                                         | MultipleChoice      |
-| 475 | scinli                                                               | metaeval/scinli                           |                                                     |                 | scinli                                       | Classification      |
-| 476 | naturallogic                                                         | metaeval/naturallogic                     |                                                     |                 | naturallogic                                 | Classification      |
-| 477 | onestop_qa                                                           | onestop_qa                                |                                                     |                 | onestop_qa                                   | MultipleChoice      |
-| 478 | moral_stories/full                                                   | demelin/moral_stories                     | full                                                |                 | moral_stories                                | MultipleChoice      |
-| 479 | prost                                                                | corypaik/prost                            |                                                     |                 | prost                                        | MultipleChoice      |
-| 480 | dynahate                                                             | aps/dynahate                              |                                                     |                 | dyna_hate                                    | Classification      |
-| 481 | syntactic-augmentation-nli                                           | metaeval/syntactic-augmentation-nli       |                                                     |                 | syntactic_augmentation_nli                   | Classification      |
-| 482 | autotnli                                                             | metaeval/autotnli                         |                                                     |                 | autotnli                                     | Classification      |
-| 483 | CONDAQA                                                              | lasha-nlp/CONDAQA                         |                                                     |                 | conqada                                      | Classification      |
-| 484 | webgpt_comparisons                                                   | openai/webgpt_comparisons                 |                                                     |                 | webgbpt_comparisons                          | MultipleChoice      |
-| 485 | synthetic-instruct-gptj-pairwise                                     | Dahoas/synthetic-instruct-gptj-pairwise   |                                                     |                 | synthetic_instruct                           | MultipleChoice      |
-| 486 | scruples                                                             | metaeval/scruples                         |                                                     |                 | scruples                                     | Classification      |
-| 487 | wouldyourather                                                       | metaeval/wouldyourather                   |                                                     |                 | wouldyourather                               | MultipleChoice      |
-| 488 | attempto-nli                                                         | sileod/attempto-nli                       |                                                     |                 | attempto_nli                                 | Classification      |
-| 489 | defeasible-nli/atomic                                                | metaeval/defeasible-nli                   | atomic                                              |                 | defeasible_nli                               | Classification      |
-| 490 | defeasible-nli/snli                                                  | metaeval/defeasible-nli                   | snli                                                |                 | defeasible_nli                               | Classification      |
-| 491 | help-nli                                                             | metaeval/help-nli                         |                                                     |                 | help_nli                                     | Classification      |
-| 492 | nli-veridicality-transitivity                                        | metaeval/nli-veridicality-transitivity    |                                                     |                 | nli_veridicality_transitivity                | Classification      |
-| 493 | natural-language-satisfiability                                      | metaeval/natural-language-satisfiability  |                                                     |                 | nl_satisfiability                            | Classification      |
-| 494 | lonli                                                                | metaeval/lonli                            |                                                     |                 | lonli                                        | Classification      |
-| 495 | dadc-limit-nli                                                       | metaeval/dadc-limit-nli                   |                                                     |                 | dadc_limit                                   | Classification      |
-| 496 | FLUTE                                                                | ColumbiaNLP/FLUTE                         |                                                     |                 | flute                                        | Classification      |
-| 497 | strategy-qa                                                          | metaeval/strategy-qa                      |                                                     |                 | strategy_qa                                  | Classification      |
-| 498 | summarize_from_feedback/comparisons                                  | openai/summarize_from_feedback            | comparisons                                         |                 | summarize_from_feedback                      | MultipleChoice      |
-| 499 | folio                                                                | metaeval/folio                            |                                                     |                 | folio                                        | Classification      |
-| 500 | tomi-nli                                                             | metaeval/tomi-nli                         |                                                     |                 | tomi_nli                                     | Classification      |
-| 501 | avicenna                                                             | metaeval/avicenna                         |                                                     |                 | avicenna                                     | Classification      |
-| 502 | SHP                                                                  | stanfordnlp/SHP                           |                                                     |                 | shp                                          | MultipleChoice      |
-| 503 | MedQA-USMLE-4-options-hf                                             | GBaker/MedQA-USMLE-4-options-hf           |                                                     |                 | medqa_usmle                                  | MultipleChoice      |
-| 504 | wikimedqa/medwiki                                                    | sileod/wikimedqa                          | medwiki                                             |                 | wikimedqa                                    | MultipleChoice      |
-| 505 | cicero                                                               | declare-lab/cicero                        |                                                     |                 | cicero                                       | MultipleChoice      |
-| 506 | CREAK                                                                | amydeng2000/CREAK                         |                                                     |                 | creak                                        | Classification      |
-| 507 | mutual                                                               | metaeval/mutual                           |                                                     |                 | mutual                                       | MultipleChoice      |
-| 508 | NeQA                                                                 | inverse-scaling/NeQA                      |                                                     |                 | neqa                                         | MultipleChoice      |
-| 509 | quote-repetition                                                     | inverse-scaling/quote-repetition          |                                                     |                 | quote_repetition                             | MultipleChoice      |
-| 510 | redefine-math                                                        | inverse-scaling/redefine-math             |                                                     |                 | redefine_math                                | MultipleChoice      |
-| 511 | puzzte                                                               | metaeval/puzzte                           |                                                     |                 | puzzte                                       | Classification      |
-| 512 | implicatures                                                         | metaeval/implicatures                     |                                                     |                 | implicatures                                 | MultipleChoice      |
-| 513 | race/middle                                                          | race                                      | middle                                              |                 | race                                         | MultipleChoice      |
-| 514 | race/high                                                            | race                                      | high                                                |                 | race                                         | MultipleChoice      |
-| 515 | race-c                                                               | metaeval/race-c                           |                                                     |                 | race_c                                       | MultipleChoice      |
-| 516 | spartqa-yn                                                           | metaeval/spartqa-yn                       |                                                     |                 | spartqa_yn                                   | Classification      |
-| 517 | spartqa-mchoice                                                      | metaeval/spartqa-mchoice                  |                                                     |                 | spartqa_mc                                   | MultipleChoice      |
-| 518 | temporal-nli                                                         | metaeval/temporal-nli                     |                                                     |                 | temporal_nli                                 | Classification      |
-| 519 | riddle_sense                                                         | riddle_sense                              |                                                     |                 | riddle_sense                                 | MultipleChoice      |
-| 520 | clcd-english                                                         | metaeval/clcd-english                     |                                                     |                 | clcd                                         | Classification      |
-| 521 | twentyquestions                                                      | maximedb/twentyquestions                  |                                                     |                 | twentyquestions                              | Classification      |
-| 522 | reclor                                                               | metaeval/reclor                           |                                                     |                 | reclor                                       | MultipleChoice      |
-| 523 | counterfactually-augmented-imdb                                      | metaeval/counterfactually-augmented-imdb  |                                                     |                 | c_aug_imdb                                   | Classification      |
-| 524 | counterfactually-augmented-snli                                      | metaeval/counterfactually-augmented-snli  |                                                     |                 | c_aug_snli                                   | Classification      |
-| 525 | cnli                                                                 | metaeval/cnli                             |                                                     |                 | cnli                                         | Classification      |
-| 526 | boolq-natural-perturbations                                          | metaeval/boolq-natural-perturbations      |                                                     |                 | perturbed_boolq                              | Classification      |
-| 527 | acceptability-prediction                                             | metaeval/acceptability-prediction         |                                                     |                 | graded_acceptability                         | Classification      |
-| 528 | equate                                                               | metaeval/equate                           |                                                     |                 | equate                                       | Classification      |
-| 529 | ScienceQA_text_only                                                  | metaeval/ScienceQA_text_only              |                                                     |                 | science_qa                                   | MultipleChoice      |
-| 530 | ekar_english                                                         | Jiangjie/ekar_english                     |                                                     |                 | ekar                                         | MultipleChoice      |
-| 531 | implicit-hate-stg1                                                   | metaeval/implicit-hate-stg1               |                                                     |                 | implicit_hate                                | Classification      |
-| 532 | chaos-mnli-ambiguity                                                 | metaeval/chaos-mnli-ambiguity             |                                                     |                 | nli_unambiguity                              | Classification      |
-| 533 | headline_cause/en_simple                                             | IlyaGusev/headline_cause                  | en_simple                                           |                 | headline_cause                               | Classification      |
-| 534 | logiqa-2.0-nli                                                       | metaeval/logiqa-2.0-nli                   |                                                     |                 | logiqa_2                                     | Classification      |
-| 535 | oasst1_dense_flat/quality                                            | tasksource/oasst1_dense_flat              |                                                     | quality         | oasst1__quality                              | Classification      |
-| 536 | oasst1_dense_flat/toxicity                                           | tasksource/oasst1_dense_flat              |                                                     | toxicity        | oasst1__toxicity                             | Classification      |
-| 537 | oasst1_dense_flat/helpfulness                                        | tasksource/oasst1_dense_flat              |                                                     | helpfulness     | oasst1__helpfulness                          | Classification      |
-| 538 | PARARULE-Plus                                                        | qbao775/PARARULE-Plus                     |                                                     |                 | para_rules                                   | Classification      |
-| 539 | mindgames                                                            | sileod/mindgames                          |                                                     |                 | mindgames                                    | Classification      |
-| 540 | universal_dependencies/en_ewt/deprel                                 | universal_dependencies                    | en_ewt                                              | deprel          | udep__deprel                                 | TokenClassification |
+| 440 | blog_authorship_corpus/job                                           | blog_authorship_corpus                    |                                                     | job             | blog_authorship_corpus__job                  | Classification      |
+| 441 | open_question_type                                                   | launch/open_question_type                 |                                                     |                 | launch_open_question_type                    | Classification      |
+| 442 | health_fact                                                          | health_fact                               |                                                     |                 | health_fact                                  | Classification      |
+| 443 | commonsense_qa                                                       | commonsense_qa                            |                                                     |                 | commonsense_qa                               | MultipleChoice      |
+| 444 | mc_taco                                                              | mc_taco                                   |                                                     |                 | mc_taco                                      | Classification      |
+| 445 | ade_corpus_v2/Ade_corpus_v2_classification                           | ade_corpus_v2                             | Ade_corpus_v2_classification                        |                 | ade_corpus_v2___Ade_corpus_v2_classification | Classification      |
+| 446 | discosense                                                           | prajjwal1/discosense                      |                                                     |                 | discosense                                   | MultipleChoice      |
+| 447 | circa                                                                | circa                                     |                                                     |                 | circa                                        | Classification      |
+| 448 | EffectiveFeedbackStudentWriting                                      | YaHi/EffectiveFeedbackStudentWriting      |                                                     |                 | effective_feedback_student_writing           | Classification      |
+| 449 | phrase_similarity                                                    | PiC/phrase_similarity                     |                                                     |                 | phrase_similarity                            | Classification      |
+| 450 | scientific-exaggeration-detection                                    | copenlu/scientific-exaggeration-detection |                                                     |                 | exaggeration_detection                       | Classification      |
+| 451 | quarel                                                               | quarel                                    |                                                     |                 | quarel                                       | Classification      |
+| 452 | fever-evidence-related/mwong--fever-related                          | mwong/fever-evidence-related              | mwong--fever-related                                |                 | mwong_fever_evidence_related                 | Classification      |
+| 453 | numer_sense                                                          | numer_sense                               |                                                     |                 | numer_sense                                  | Classification      |
+| 454 | dynasent/dynabench.dynasent.r1.all/r1                                | dynabench/dynasent                        | dynabench.dynasent.r1.all                           | r1              | dynasent__r1                                 | Classification      |
+| 455 | dynasent/dynabench.dynasent.r2.all/r2                                | dynabench/dynasent                        | dynabench.dynasent.r2.all                           | r2              | dynasent__r2                                 | Classification      |
+| 456 | Sarcasm_News_Headline                                                | raquiba/Sarcasm_News_Headline             |                                                     |                 | sarcasm_news                                 | Classification      |
+| 457 | sem_eval_2010_task_8                                                 | sem_eval_2010_task_8                      |                                                     |                 | sem_eval_2010_task_8                         | Classification      |
+| 458 | auditor_review/demo-org--auditor_review                              | demo-org/auditor_review                   | demo-org--auditor_review                            |                 | demo_org_auditor_review                      | Classification      |
+| 459 | medmcqa                                                              | medmcqa                                   |                                                     |                 | medmcqa                                      | MultipleChoice      |
+| 460 | Dynasent_Disagreement                                                | RuyuanWan/Dynasent_Disagreement           |                                                     |                 | dynasent_disagreement                        | Classification      |
+| 461 | Politeness_Disagreement                                              | RuyuanWan/Politeness_Disagreement         |                                                     |                 | politeness_disagreement                      | Classification      |
+| 462 | SBIC_Disagreement                                                    | RuyuanWan/SBIC_Disagreement               |                                                     |                 | sbic_disagreement                            | Classification      |
+| 463 | SChem_Disagreement                                                   | RuyuanWan/SChem_Disagreement              |                                                     |                 | schem_disagreement                           | Classification      |
+| 464 | Dilemmas_Disagreement                                                | RuyuanWan/Dilemmas_Disagreement           |                                                     |                 | dilemmas_disagreement                        | Classification      |
+| 465 | logiqa                                                               | lucasmccabe/logiqa                        |                                                     |                 | logiqa                                       | MultipleChoice      |
+| 466 | wiki_qa                                                              | wiki_qa                                   |                                                     |                 | wiki_qa                                      | Classification      |
+| 467 | cycic_classification                                                 | metaeval/cycic_classification             |                                                     |                 | cycic_classification                         | Classification      |
+| 468 | cycic_multiplechoice                                                 | metaeval/cycic_multiplechoice             |                                                     |                 | cycic_mc                                     | MultipleChoice      |
+| 469 | sts-companion                                                        | metaeval/sts-companion                    |                                                     |                 | sts_companion                                | Classification      |
+| 470 | commonsense_qa_2.0                                                   | metaeval/commonsense_qa_2.0               |                                                     |                 | commonsense_qa_2                             | Classification      |
+| 471 | lingnli                                                              | metaeval/lingnli                          |                                                     |                 | ling_nli                                     | Classification      |
+| 472 | monotonicity-entailment                                              | metaeval/monotonicity-entailment          |                                                     |                 | monotonicity_entailment                      | Classification      |
+| 473 | arct                                                                 | metaeval/arct                             |                                                     |                 | arct                                         | MultipleChoice      |
+| 474 | scinli                                                               | metaeval/scinli                           |                                                     |                 | scinli                                       | Classification      |
+| 475 | naturallogic                                                         | metaeval/naturallogic                     |                                                     |                 | naturallogic                                 | Classification      |
+| 476 | onestop_qa                                                           | onestop_qa                                |                                                     |                 | onestop_qa                                   | MultipleChoice      |
+| 477 | moral_stories/full                                                   | demelin/moral_stories                     | full                                                |                 | moral_stories                                | MultipleChoice      |
+| 478 | prost                                                                | corypaik/prost                            |                                                     |                 | prost                                        | MultipleChoice      |
+| 479 | dynahate                                                             | aps/dynahate                              |                                                     |                 | dyna_hate                                    | Classification      |
+| 480 | syntactic-augmentation-nli                                           | metaeval/syntactic-augmentation-nli       |                                                     |                 | syntactic_augmentation_nli                   | Classification      |
+| 481 | autotnli                                                             | metaeval/autotnli                         |                                                     |                 | autotnli                                     | Classification      |
+| 482 | CONDAQA                                                              | lasha-nlp/CONDAQA                         |                                                     |                 | conqada                                      | Classification      |
+| 483 | webgpt_comparisons                                                   | openai/webgpt_comparisons                 |                                                     |                 | webgbpt_comparisons                          | MultipleChoice      |
+| 484 | synthetic-instruct-gptj-pairwise                                     | Dahoas/synthetic-instruct-gptj-pairwise   |                                                     |                 | synthetic_instruct                           | MultipleChoice      |
+| 485 | scruples                                                             | metaeval/scruples                         |                                                     |                 | scruples                                     | Classification      |
+| 486 | wouldyourather                                                       | metaeval/wouldyourather                   |                                                     |                 | wouldyourather                               | MultipleChoice      |
+| 487 | attempto-nli                                                         | sileod/attempto-nli                       |                                                     |                 | attempto_nli                                 | Classification      |
+| 488 | defeasible-nli/atomic                                                | metaeval/defeasible-nli                   | atomic                                              |                 | defeasible_nli                               | Classification      |
+| 489 | defeasible-nli/snli                                                  | metaeval/defeasible-nli                   | snli                                                |                 | defeasible_nli                               | Classification      |
+| 490 | help-nli                                                             | metaeval/help-nli                         |                                                     |                 | help_nli                                     | Classification      |
+| 491 | nli-veridicality-transitivity                                        | metaeval/nli-veridicality-transitivity    |                                                     |                 | nli_veridicality_transitivity                | Classification      |
+| 492 | natural-language-satisfiability                                      | metaeval/natural-language-satisfiability  |                                                     |                 | nl_satisfiability                            | Classification      |
+| 493 | lonli                                                                | metaeval/lonli                            |                                                     |                 | lonli                                        | Classification      |
+| 494 | dadc-limit-nli                                                       | metaeval/dadc-limit-nli                   |                                                     |                 | dadc_limit                                   | Classification      |
+| 495 | FLUTE                                                                | ColumbiaNLP/FLUTE                         |                                                     |                 | flute                                        | Classification      |
+| 496 | strategy-qa                                                          | metaeval/strategy-qa                      |                                                     |                 | strategy_qa                                  | Classification      |
+| 497 | summarize_from_feedback/comparisons                                  | openai/summarize_from_feedback            | comparisons                                         |                 | summarize_from_feedback                      | MultipleChoice      |
+| 498 | folio                                                                | metaeval/folio                            |                                                     |                 | folio                                        | Classification      |
+| 499 | tomi-nli                                                             | metaeval/tomi-nli                         |                                                     |                 | tomi_nli                                     | Classification      |
+| 500 | avicenna                                                             | metaeval/avicenna                         |                                                     |                 | avicenna                                     | Classification      |
+| 501 | SHP                                                                  | stanfordnlp/SHP                           |                                                     |                 | shp                                          | MultipleChoice      |
+| 502 | MedQA-USMLE-4-options-hf                                             | GBaker/MedQA-USMLE-4-options-hf           |                                                     |                 | medqa_usmle                                  | MultipleChoice      |
+| 503 | wikimedqa/medwiki                                                    | sileod/wikimedqa                          | medwiki                                             |                 | wikimedqa                                    | MultipleChoice      |
+| 504 | cicero                                                               | declare-lab/cicero                        |                                                     |                 | cicero                                       | MultipleChoice      |
+| 505 | CREAK                                                                | amydeng2000/CREAK                         |                                                     |                 | creak                                        | Classification      |
+| 506 | mutual                                                               | metaeval/mutual                           |                                                     |                 | mutual                                       | MultipleChoice      |
+| 507 | NeQA                                                                 | inverse-scaling/NeQA                      |                                                     |                 | neqa                                         | MultipleChoice      |
+| 508 | quote-repetition                                                     | inverse-scaling/quote-repetition          |                                                     |                 | quote_repetition                             | MultipleChoice      |
+| 509 | redefine-math                                                        | inverse-scaling/redefine-math             |                                                     |                 | redefine_math                                | MultipleChoice      |
+| 510 | puzzte                                                               | metaeval/puzzte                           |                                                     |                 | puzzte                                       | Classification      |
+| 511 | implicatures                                                         | metaeval/implicatures                     |                                                     |                 | implicatures                                 | MultipleChoice      |
+| 512 | race/middle                                                          | race                                      | middle                                              |                 | race                                         | MultipleChoice      |
+| 513 | race/high                                                            | race                                      | high                                                |                 | race                                         | MultipleChoice      |
+| 514 | race-c                                                               | metaeval/race-c                           |                                                     |                 | race_c                                       | MultipleChoice      |
+| 515 | spartqa-yn                                                           | metaeval/spartqa-yn                       |                                                     |                 | spartqa_yn                                   | Classification      |
+| 516 | spartqa-mchoice                                                      | metaeval/spartqa-mchoice                  |                                                     |                 | spartqa_mc                                   | MultipleChoice      |
+| 517 | temporal-nli                                                         | metaeval/temporal-nli                     |                                                     |                 | temporal_nli                                 | Classification      |
+| 518 | riddle_sense                                                         | riddle_sense                              |                                                     |                 | riddle_sense                                 | MultipleChoice      |
+| 519 | clcd-english                                                         | metaeval/clcd-english                     |                                                     |                 | clcd                                         | Classification      |
+| 520 | twentyquestions                                                      | maximedb/twentyquestions                  |                                                     |                 | twentyquestions                              | Classification      |
+| 521 | reclor                                                               | metaeval/reclor                           |                                                     |                 | reclor                                       | MultipleChoice      |
+| 522 | counterfactually-augmented-imdb                                      | metaeval/counterfactually-augmented-imdb  |                                                     |                 | c_aug_imdb                                   | Classification      |
+| 523 | counterfactually-augmented-snli                                      | metaeval/counterfactually-augmented-snli  |                                                     |                 | c_aug_snli                                   | Classification      |
+| 524 | cnli                                                                 | metaeval/cnli                             |                                                     |                 | cnli                                         | Classification      |
+| 525 | boolq-natural-perturbations                                          | metaeval/boolq-natural-perturbations      |                                                     |                 | perturbed_boolq                              | Classification      |
+| 526 | acceptability-prediction                                             | metaeval/acceptability-prediction         |                                                     |                 | graded_acceptability                         | Classification      |
+| 527 | equate                                                               | metaeval/equate                           |                                                     |                 | equate                                       | Classification      |
+| 528 | ScienceQA_text_only                                                  | metaeval/ScienceQA_text_only              |                                                     |                 | science_qa                                   | MultipleChoice      |
+| 529 | ekar_english                                                         | Jiangjie/ekar_english                     |                                                     |                 | ekar                                         | MultipleChoice      |
+| 530 | implicit-hate-stg1                                                   | metaeval/implicit-hate-stg1               |                                                     |                 | implicit_hate                                | Classification      |
+| 531 | chaos-mnli-ambiguity                                                 | metaeval/chaos-mnli-ambiguity             |                                                     |                 | nli_unambiguity                              | Classification      |
+| 532 | headline_cause/en_simple                                             | IlyaGusev/headline_cause                  | en_simple                                           |                 | headline_cause                               | Classification      |
+| 533 | logiqa-2.0-nli                                                       | metaeval/logiqa-2.0-nli                   |                                                     |                 | logiqa_2                                     | Classification      |
+| 534 | oasst1_dense_flat/quality                                            | tasksource/oasst1_dense_flat              |                                                     | quality         | oasst1__quality                              | Classification      |
+| 535 | oasst1_dense_flat/toxicity                                           | tasksource/oasst1_dense_flat              |                                                     | toxicity        | oasst1__toxicity                             | Classification      |
+| 536 | oasst1_dense_flat/helpfulness                                        | tasksource/oasst1_dense_flat              |                                                     | helpfulness     | oasst1__helpfulness                          | Classification      |
+| 537 | PARARULE-Plus                                                        | qbao775/PARARULE-Plus                     |                                                     |                 | para_rules                                   | Classification      |
+| 538 | mindgames                                                            | sileod/mindgames                          |                                                     |                 | mindgames                                    | Classification      |
+| 539 | universal_dependencies/en_gum/deprel                                 | universal_dependencies                    | en_gum                                              | deprel          | udep__deprel                                 | TokenClassification |
+| 540 | universal_dependencies/en_partut/deprel                              | universal_dependencies                    | en_partut                                           | deprel          | udep__deprel                                 | TokenClassification |
 | 541 | universal_dependencies/en_lines/deprel                               | universal_dependencies                    | en_lines                                            | deprel          | udep__deprel                                 | TokenClassification |
-| 542 | universal_dependencies/en_partut/deprel                              | universal_dependencies                    | en_partut                                           | deprel          | udep__deprel                                 | TokenClassification |
-| 543 | universal_dependencies/en_gum/deprel                                 | universal_dependencies                    | en_gum                                              | deprel          | udep__deprel                                 | TokenClassification |
-| 544 | ambient                                                              | metaeval/ambient                          |                                                     |                 | ambient                                      | Classification      |
-| 545 | path-naturalness-prediction                                          | metaeval/path-naturalness-prediction      |                                                     |                 | path_naturalness                             | MultipleChoice      |
-| 546 | civil_comments/toxicity                                              | civil_comments                            |                                                     | toxicity        | civil_comments__toxicity                     | Classification      |
-| 547 | civil_comments/severe_toxicity                                       | civil_comments                            |                                                     | severe_toxicity | civil_comments__severe_toxicity              | Classification      |
-| 548 | civil_comments/obscene                                               | civil_comments                            |                                                     | obscene         | civil_comments__obscene                      | Classification      |
-| 549 | civil_comments/threat                                                | civil_comments                            |                                                     | threat          | civil_comments__threat                       | Classification      |
-| 550 | civil_comments/insult                                                | civil_comments                            |                                                     | insult          | civil_comments__insult                       | Classification      |
-| 551 | civil_comments/identity_attack                                       | civil_comments                            |                                                     | identity_attack | civil_comments__identity_attack              | Classification      |
-| 552 | civil_comments/sexual_explicit                                       | civil_comments                            |                                                     | sexual_explicit | civil_comments__sexual_explicit              | Classification      |
-| 553 | cloth                                                                | AndyChiang/cloth                          |                                                     |                 | cloth                                        | MultipleChoice      |
-| 554 | dgen                                                                 | AndyChiang/dgen                           |                                                     |                 | dgen                                         | MultipleChoice      |
-| 555 | oasst1_pairwise_rlhf_reward                                          | tasksource/oasst1_pairwise_rlhf_reward    |                                                     |                 | oasst_rlhf                                   | MultipleChoice      |
-| 556 | I2D2                                                                 | tasksource/I2D2                           |                                                     |                 | i2d2                                         | Classification      |
-| 557 | args_me                                                              | webis/args_me                             |                                                     |                 | arg_me                                       | Classification      |
-| 558 | Touche23-ValueEval                                                   | webis/Touche23-ValueEval                  |                                                     |                 | valueeval_stance                             | Classification      |
-| 559 | starcon                                                              | tasksource/starcon                        |                                                     |                 | starcon                                      | Classification      |
-| 560 | banking77                                                            | PolyAI/banking77                          |                                                     |                 | banking77                                    | Classification      |
-| 561 | ruletaker                                                            | tasksource/ruletaker                      |                                                     |                 | ruletaker                                    | Classification      |
-| 562 | lsat_qa/all                                                          | lighteval/lsat_qa                         | all                                                 |                 | lsat_qa                                      | MultipleChoice      |
-| 563 | ConTRoL-nli                                                          | tasksource/ConTRoL-nli                    |                                                     |                 | control                                      | Classification      |
-| 564 | tracie                                                               | tasksource/tracie                         |                                                     |                 | tracie                                       | Classification      |
-| 565 | sherliic                                                             | tasksource/sherliic                       |                                                     |                 | sherliic                                     | Classification      |
-| 566 | sen-making/1                                                         | tasksource/sen-making                     |                                                     | 1               | sen_making__1                                | MultipleChoice      |
-| 567 | sen-making/2                                                         | tasksource/sen-making                     |                                                     | 2               | sen_making__2                                | MultipleChoice      |
-| 568 | winowhy                                                              | tasksource/winowhy                        |                                                     |                 | winowhy                                      | Classification      |
-| 569 | mbib-base/cognitive-bias                                             | mediabiasgroup/mbib-base                  | cognitive-bias                                      |                 | mbib_cognitive_bias                          | Classification      |
-| 570 | mbib-base/fake-news                                                  | mediabiasgroup/mbib-base                  | fake-news                                           |                 | mbib_fake_news                               | Classification      |
-| 571 | mbib-base/gender-bias                                                | mediabiasgroup/mbib-base                  | gender-bias                                         |                 | mbib_gender_bias                             | Classification      |
-| 572 | mbib-base/hate-speech                                                | mediabiasgroup/mbib-base                  | hate-speech                                         |                 | mbib_hate_speech                             | Classification      |
-| 573 | mbib-base/linguistic-bias                                            | mediabiasgroup/mbib-base                  | linguistic-bias                                     |                 | mbib_linguistic_bias                         | Classification      |
-| 574 | mbib-base/political-bias                                             | mediabiasgroup/mbib-base                  | political-bias                                      |                 | mbib_political_bias                          | Classification      |
-| 575 | mbib-base/racial-bias                                                | mediabiasgroup/mbib-base                  | racial-bias                                         |                 | mbib_racial_bias                             | Classification      |
-| 576 | mbib-base/text-level-bias                                            | mediabiasgroup/mbib-base                  | text-level-bias                                     |                 | mbib_text_level_bias                         | Classification      |
-| 577 | robustLR                                                             | tasksource/robustLR                       |                                                     |                 | robustLR                                     | Classification      |
-| 578 | v1/gen_train234_test2to10                                            | CLUTRR/v1                                 | gen_train234_test2to10                              |                 | cluttr                                       | Classification      |
-| 579 | logical-fallacy                                                      | tasksource/logical-fallacy                |                                                     |                 | logical_fallacy                              | Classification      |
-| 580 | parade                                                               | tasksource/parade                         |                                                     |                 | parade                                       | Classification      |
-| 581 | cladder                                                              | tasksource/cladder                        |                                                     |                 | cladder                                      | Classification      |
-| 582 | subjectivity                                                         | tasksource/subjectivity                   |                                                     |                 | subjectivity                                 | Classification      |
-| 583 | MOH                                                                  | tasksource/MOH                            |                                                     |                 | moh                                          | Classification      |
-| 584 | VUAC                                                                 | tasksource/VUAC                           |                                                     |                 | vuac                                         | Classification      |
-| 585 | TroFi                                                                | tasksource/TroFi                          |                                                     |                 | trofi                                        | Classification      |
-| 586 | sharc_modified/mod                                                   | sharc_modified                            | mod                                                 |                 | sharc_classification                         | Classification      |
-| 587 | conceptrules_v2                                                      | tasksource/conceptrules_v2                |                                                     |                 | conceptrules_v2                              | Classification      |
-| 588 | disrpt/eng.dep.scidtb                                                | metaeval/disrpt                           | eng.dep.scidtb                                      |                 | scidtb                                       | Classification      |
-| 589 | conll2000                                                            | conll2000                                 |                                                     |                 | chunking                                     | TokenClassification |
-| 590 | few-nerd/supervised                                                  | DFKI-SLT/few-nerd                         | supervised                                          |                 | few_nerd                                     | TokenClassification |
-| 591 | finer-139                                                            | nlpaueb/finer-139                         |                                                     |                 | finer                                        | TokenClassification |
-| 592 | zero-shot-label-nli                                                  | tasksource/zero-shot-label-nli            |                                                     |                 | label_nli                                    | Classification      |
-| 593 | com2sense                                                            | tasksource/com2sense                      |                                                     |                 | com2sense                                    | Classification      |
-| 594 | scone                                                                | tasksource/scone                          |                                                     |                 | scone                                        | Classification      |
-| 595 | winodict                                                             | tasksource/winodict                       |                                                     |                 | winodict                                     | MultipleChoice      |
-| 596 | fool-me-twice                                                        | tasksource/fool-me-twice                  |                                                     |                 | fool_me_twice                                | Classification      |
-| 597 | monli                                                                | tasksource/monli                          |                                                     |                 | monli                                        | Classification      |
-| 598 | corr2cause                                                           | tasksource/corr2cause                     |                                                     |                 | causality                                    | Classification      |
-| 599 | lsat_qa/all                                                          | lighteval/lsat_qa                         | all                                                 |                 | lsat                                         | MultipleChoice      |
-| 600 | apt                                                                  | tasksource/apt                            |                                                     |                 | apt                                          | Classification      |
-| 601 | twitter-financial-news-sentiment                                     | zeroshot/twitter-financial-news-sentiment |                                                     |                 | financial_sentiment                          | Classification      |
-| 602 | icl-symbol-tuning-instruct                                           | tasksource/icl-symbol-tuning-instruct     |                                                     |                 | icl                                          | Classification      |
-| 603 | SpaceNLI                                                             | tasksource/SpaceNLI                       |                                                     |                 | space_nli                                    | Classification      |
-| 604 | propsegment/nli                                                      | sihaochen/propsegment                     | nli                                                 |                 | propsegment                                  | Classification      |
-| 605 | HatemojiBuild                                                        | HannahRoseKirk/HatemojiBuild              |                                                     |                 | hatemoji                                     | Classification      |
-| 606 | regset                                                               | tasksource/regset                         |                                                     |                 | regset                                       | Classification      |
-| 607 | esci                                                                 | tasksource/esci                           |                                                     |                 | esci                                         | Classification      |
-| 608 | chatbot_arena_conversations                                          | lmsys/chatbot_arena_conversations         |                                                     |                 | chatbot_arena                                | MultipleChoice      |
-| 609 | dnd_style_intents                                                    | neurae/dnd_style_intents                  |                                                     |                 | dnd_intent                                   | Classification      |
+| 542 | universal_dependencies/en_ewt/deprel                                 | universal_dependencies                    | en_ewt                                              | deprel          | udep__deprel                                 | TokenClassification |
+| 543 | ambient                                                              | metaeval/ambient                          |                                                     |                 | ambient                                      | Classification      |
+| 544 | path-naturalness-prediction                                          | metaeval/path-naturalness-prediction      |                                                     |                 | path_naturalness                             | MultipleChoice      |
+| 545 | civil_comments/toxicity                                              | civil_comments                            |                                                     | toxicity        | civil_comments__toxicity                     | Classification      |
+| 546 | civil_comments/severe_toxicity                                       | civil_comments                            |                                                     | severe_toxicity | civil_comments__severe_toxicity              | Classification      |
+| 547 | civil_comments/obscene                                               | civil_comments                            |                                                     | obscene         | civil_comments__obscene                      | Classification      |
+| 548 | civil_comments/threat                                                | civil_comments                            |                                                     | threat          | civil_comments__threat                       | Classification      |
+| 549 | civil_comments/insult                                                | civil_comments                            |                                                     | insult          | civil_comments__insult                       | Classification      |
+| 550 | civil_comments/identity_attack                                       | civil_comments                            |                                                     | identity_attack | civil_comments__identity_attack              | Classification      |
+| 551 | civil_comments/sexual_explicit                                       | civil_comments                            |                                                     | sexual_explicit | civil_comments__sexual_explicit              | Classification      |
+| 552 | cloth                                                                | AndyChiang/cloth                          |                                                     |                 | cloth                                        | MultipleChoice      |
+| 553 | dgen                                                                 | AndyChiang/dgen                           |                                                     |                 | dgen                                         | MultipleChoice      |
+| 554 | oasst1_pairwise_rlhf_reward                                          | tasksource/oasst1_pairwise_rlhf_reward    |                                                     |                 | oasst_rlhf                                   | MultipleChoice      |
+| 555 | I2D2                                                                 | tasksource/I2D2                           |                                                     |                 | i2d2                                         | Classification      |
+| 556 | args_me                                                              | webis/args_me                             |                                                     |                 | arg_me                                       | Classification      |
+| 557 | Touche23-ValueEval                                                   | webis/Touche23-ValueEval                  |                                                     |                 | valueeval_stance                             | Classification      |
+| 558 | starcon                                                              | tasksource/starcon                        |                                                     |                 | starcon                                      | Classification      |
+| 559 | banking77                                                            | PolyAI/banking77                          |                                                     |                 | banking77                                    | Classification      |
+| 560 | ruletaker                                                            | tasksource/ruletaker                      |                                                     |                 | ruletaker                                    | Classification      |
+| 561 | lsat_qa/all                                                          | lighteval/lsat_qa                         | all                                                 |                 | lsat_qa                                      | MultipleChoice      |
+| 562 | ConTRoL-nli                                                          | tasksource/ConTRoL-nli                    |                                                     |                 | control                                      | Classification      |
+| 563 | tracie                                                               | tasksource/tracie                         |                                                     |                 | tracie                                       | Classification      |
+| 564 | sherliic                                                             | tasksource/sherliic                       |                                                     |                 | sherliic                                     | Classification      |
+| 565 | sen-making/1                                                         | tasksource/sen-making                     |                                                     | 1               | sen_making__1                                | MultipleChoice      |
+| 566 | sen-making/2                                                         | tasksource/sen-making                     |                                                     | 2               | sen_making__2                                | MultipleChoice      |
+| 567 | winowhy                                                              | tasksource/winowhy                        |                                                     |                 | winowhy                                      | Classification      |
+| 568 | mbib-base/cognitive-bias                                             | mediabiasgroup/mbib-base                  | cognitive-bias                                      |                 | mbib_cognitive_bias                          | Classification      |
+| 569 | mbib-base/fake-news                                                  | mediabiasgroup/mbib-base                  | fake-news                                           |                 | mbib_fake_news                               | Classification      |
+| 570 | mbib-base/gender-bias                                                | mediabiasgroup/mbib-base                  | gender-bias                                         |                 | mbib_gender_bias                             | Classification      |
+| 571 | mbib-base/hate-speech                                                | mediabiasgroup/mbib-base                  | hate-speech                                         |                 | mbib_hate_speech                             | Classification      |
+| 572 | mbib-base/linguistic-bias                                            | mediabiasgroup/mbib-base                  | linguistic-bias                                     |                 | mbib_linguistic_bias                         | Classification      |
+| 573 | mbib-base/political-bias                                             | mediabiasgroup/mbib-base                  | political-bias                                      |                 | mbib_political_bias                          | Classification      |
+| 574 | mbib-base/racial-bias                                                | mediabiasgroup/mbib-base                  | racial-bias                                         |                 | mbib_racial_bias                             | Classification      |
+| 575 | mbib-base/text-level-bias                                            | mediabiasgroup/mbib-base                  | text-level-bias                                     |                 | mbib_text_level_bias                         | Classification      |
+| 576 | robustLR                                                             | tasksource/robustLR                       |                                                     |                 | robustLR                                     | Classification      |
+| 577 | v1/gen_train234_test2to10                                            | CLUTRR/v1                                 | gen_train234_test2to10                              |                 | cluttr                                       | Classification      |
+| 578 | logical-fallacy                                                      | tasksource/logical-fallacy                |                                                     |                 | logical_fallacy                              | Classification      |
+| 579 | parade                                                               | tasksource/parade                         |                                                     |                 | parade                                       | Classification      |
+| 580 | cladder                                                              | tasksource/cladder                        |                                                     |                 | cladder                                      | Classification      |
+| 581 | subjectivity                                                         | tasksource/subjectivity                   |                                                     |                 | subjectivity                                 | Classification      |
+| 582 | MOH                                                                  | tasksource/MOH                            |                                                     |                 | moh                                          | Classification      |
+| 583 | VUAC                                                                 | tasksource/VUAC                           |                                                     |                 | vuac                                         | Classification      |
+| 584 | TroFi                                                                | tasksource/TroFi                          |                                                     |                 | trofi                                        | Classification      |
+| 585 | sharc_modified/mod                                                   | sharc_modified                            | mod                                                 |                 | sharc_classification                         | Classification      |
+| 586 | conceptrules_v2                                                      | tasksource/conceptrules_v2                |                                                     |                 | conceptrules_v2                              | Classification      |
+| 587 | disrpt/eng.dep.scidtb                                                | metaeval/disrpt                           | eng.dep.scidtb                                      |                 | scidtb                                       | Classification      |
+| 588 | conll2000                                                            | conll2000                                 |                                                     |                 | chunking                                     | TokenClassification |
+| 589 | few-nerd/supervised                                                  | DFKI-SLT/few-nerd                         | supervised                                          |                 | few_nerd                                     | TokenClassification |
+| 590 | finer-139                                                            | nlpaueb/finer-139                         |                                                     |                 | finer                                        | TokenClassification |
+| 591 | zero-shot-label-nli                                                  | tasksource/zero-shot-label-nli            |                                                     |                 | label_nli                                    | Classification      |
+| 592 | com2sense                                                            | tasksource/com2sense                      |                                                     |                 | com2sense                                    | Classification      |
+| 593 | scone                                                                | tasksource/scone                          |                                                     |                 | scone                                        | Classification      |
+| 594 | winodict                                                             | tasksource/winodict                       |                                                     |                 | winodict                                     | MultipleChoice      |
+| 595 | fool-me-twice                                                        | tasksource/fool-me-twice                  |                                                     |                 | fool_me_twice                                | Classification      |
+| 596 | monli                                                                | tasksource/monli                          |                                                     |                 | monli                                        | Classification      |
+| 597 | corr2cause                                                           | tasksource/corr2cause                     |                                                     |                 | causality                                    | Classification      |
+| 598 | lsat_qa/all                                                          | lighteval/lsat_qa                         | all                                                 |                 | lsat                                         | MultipleChoice      |
+| 599 | apt                                                                  | tasksource/apt                            |                                                     |                 | apt                                          | Classification      |
+| 600 | twitter-financial-news-sentiment                                     | zeroshot/twitter-financial-news-sentiment |                                                     |                 | financial_sentiment                          | Classification      |
+| 601 | icl-symbol-tuning-instruct                                           | tasksource/icl-symbol-tuning-instruct     |                                                     |                 | icl                                          | Classification      |
+| 602 | SpaceNLI                                                             | tasksource/SpaceNLI                       |                                                     |                 | space_nli                                    | Classification      |
+| 603 | propsegment/nli                                                      | sihaochen/propsegment                     | nli                                                 |                 | propsegment                                  | Classification      |
+| 604 | HatemojiBuild                                                        | HannahRoseKirk/HatemojiBuild              |                                                     |                 | hatemoji                                     | Classification      |
+| 605 | regset                                                               | tasksource/regset                         |                                                     |                 | regset                                       | Classification      |
+| 606 | esci                                                                 | tasksource/esci                           |                                                     |                 | esci                                         | Classification      |
+| 607 | chatbot_arena_conversations                                          | lmsys/chatbot_arena_conversations         |                                                     |                 | chatbot_arena                                | MultipleChoice      |
+| 608 | dnd_style_intents                                                    | neurae/dnd_style_intents                  |                                                     |                 | dnd_intent                                   | Classification      |
+| 609 | FLD.v2                                                               | hitachi-nlp/FLD.v2                        |                                                     |                 | fld                                          | Classification      |
+| 610 | SDOH-NLI                                                             | tasksource/SDOH-NLI                       |                                                     |                 | sdoh_nli                                     | Classification      |
+| 611 | scifact_entailment                                                   | allenai/scifact_entailment                |                                                     |                 | scifact_entailment                           | Classification      |
+| 612 | feasibilityQA                                                        | tasksource/feasibilityQA                  |                                                     |                 | feasibilityQA                                | Classification      |
+| 613 | simple_pair                                                          | tasksource/simple_pair                    |                                                     |                 | simple_pair                                  | Classification      |
+| 614 | AdjectiveScaleProbe-nli                                              | tasksource/AdjectiveScaleProbe-nli        |                                                     |                 | adjective_scale_probe                        | Classification      |
+| 615 | resnli                                                               | tasksource/resnli                         |                                                     |                 | repectively_nli                              | Classification      |