diff --git a/src/tasksource/tasks.py b/src/tasksource/tasks.py index 432c20e..b066bec 100755 --- a/src/tasksource/tasks.py +++ b/src/tasksource/tasks.py @@ -190,7 +190,7 @@ def imppres_post_process(ds,prefix=''): 'inputs', choices_list='multiple_choice_targets', labels=lambda x:x['multiple_choice_scores'].index(1) if 1 in ['multiple_choice_scores'] else -1, - config_name=bigbench_discriminative_english - {"social_iqa"} # english multiple choice tasks, minus duplicates + config_name=bigbench_discriminative_english - {"social_i_qa","intersect_geometry"} # english multiple choice tasks, minus duplicates ) blimp_hard = MultipleChoice(inputs=constant(''), @@ -636,6 +636,11 @@ def split_choices(s): demo_org_auditor_review = Classification(sentence1="sentence", labels="label", splits=["train", None, "test"], dataset_name="demo-org/auditor_review", config_name="demo-org--auditor_review") +medmcqa = MultipleChoice("question", choices=regen('op[a-d]'),labels='cop') + +aqua_rat___tokenized = MultipleChoice("question",choices_list="options", + labels=lambda x:"ABCDE".index(x['correct'])) + ###END ################### END OF SUPPORT ###################### diff --git a/tasks.md b/tasks.md index 83a661b..e24dc89 100644 --- a/tasks.md +++ b/tasks.md @@ -1,483 +1,486 @@ -| | id | dataset_name | config_name | task_name | preprocessing_name | task_type | -|----:|:---------------------------------------------------------------------|:-----------------------------------------------|:----------------------------------------------------|:---------------|:-----------------------------------------------|:--------------------| -| 0 | anli/a1 | anli | | a1 | anli__a1 | Classification | -| 1 | anli/a2 | anli | | a2 | anli__a2 | Classification | -| 2 | anli/a3 | anli | | a3 | anli__a3 | Classification | -| 3 | babi_nli/size-reasoning | metaeval/babi_nli | size-reasoning | | babi_nli | Classification | -| 4 | babi_nli/two-arg-relations | metaeval/babi_nli | two-arg-relations | | babi_nli | Classification | -| 5 | babi_nli/two-supporting-facts | metaeval/babi_nli | two-supporting-facts | | babi_nli | Classification | -| 6 | babi_nli/counting | metaeval/babi_nli | counting | | babi_nli | Classification | -| 7 | babi_nli/conjunction | metaeval/babi_nli | conjunction | | babi_nli | Classification | -| 8 | babi_nli/indefinite-knowledge | metaeval/babi_nli | indefinite-knowledge | | babi_nli | Classification | -| 9 | babi_nli/basic-induction | metaeval/babi_nli | basic-induction | | babi_nli | Classification | -| 10 | babi_nli/simple-negation | metaeval/babi_nli | simple-negation | | babi_nli | Classification | -| 11 | babi_nli/lists-sets | metaeval/babi_nli | lists-sets | | babi_nli | Classification | -| 12 | babi_nli/three-arg-relations | metaeval/babi_nli | three-arg-relations | | babi_nli | Classification | -| 13 | babi_nli/positional-reasoning | metaeval/babi_nli | positional-reasoning | | babi_nli | Classification | -| 14 | babi_nli/three-supporting-facts | metaeval/babi_nli | three-supporting-facts | | babi_nli | Classification | -| 15 | babi_nli/time-reasoning | metaeval/babi_nli | time-reasoning | | babi_nli | Classification | -| 16 | babi_nli/basic-deduction | metaeval/babi_nli | basic-deduction | | babi_nli | Classification | -| 17 | babi_nli/basic-coreference | metaeval/babi_nli | basic-coreference | | babi_nli | Classification | -| 18 | babi_nli/compound-coreference | metaeval/babi_nli | compound-coreference | | babi_nli | Classification | -| 19 | babi_nli/single-supporting-fact | metaeval/babi_nli | single-supporting-fact | | babi_nli | Classification | -| 20 | babi_nli/yes-no-questions | metaeval/babi_nli | yes-no-questions | | babi_nli | Classification | -| 21 | babi_nli/path-finding | metaeval/babi_nli | path-finding | | babi_nli | Classification | -| 22 | lingnli | metaeval/lingnli | | | ling_nli | Classification | -| 23 | sick/label | sick | | label | sick__label | Classification | -| 24 | sick/relatedness | sick | | relatedness | sick__relatedness | Classification | -| 25 | sick/entailment_AB | sick | | entailment_AB | sick__entailment_AB | Classification | -| 26 | sick/entailment_BA | sick | | entailment_BA | sick__entailment_BA | Classification | -| 27 | snli | snli | | | snli | Classification | -| 28 | scitail/snli_format | scitail | snli_format | | scitail | Classification | -| 29 | hans | hans | | | hans | Classification | -| 30 | WANLI | alisawuffles/WANLI | | | wanli | Classification | -| 31 | recast/recast_megaveridicality | metaeval/recast | recast_megaveridicality | | recast | Classification | -| 32 | recast/recast_sentiment | metaeval/recast | recast_sentiment | | recast | Classification | -| 33 | recast/recast_ner | metaeval/recast | recast_ner | | recast | Classification | -| 34 | recast/recast_verbcorner | metaeval/recast | recast_verbcorner | | recast | Classification | -| 35 | recast/recast_verbnet | metaeval/recast | recast_verbnet | | recast | Classification | -| 36 | recast/recast_factuality | metaeval/recast | recast_factuality | | recast | Classification | -| 37 | recast/recast_puns | metaeval/recast | recast_puns | | recast | Classification | -| 38 | recast/recast_kg_relations | metaeval/recast | recast_kg_relations | | recast | Classification | -| 39 | probability_words_nli/reasoning_1hop | sileod/probability_words_nli | reasoning_1hop | | probability_words_nli | Classification | -| 40 | probability_words_nli/reasoning_2hop | sileod/probability_words_nli | reasoning_2hop | | probability_words_nli | Classification | -| 41 | probability_words_nli/usnli | sileod/probability_words_nli | usnli | | probability_words_nli | Classification | -| 42 | nan-nli/joey234--nan-nli | joey234/nan-nli | joey234--nan-nli | | nan_nli | Classification | -| 43 | nli_fever | pietrolesci/nli_fever | | | nli_fever | Classification | -| 44 | breaking_nli | pietrolesci/breaking_nli | | | breaking_nli | Classification | -| 45 | conj_nli | pietrolesci/conj_nli | | | conj_nli | Classification | -| 46 | fracas | pietrolesci/fracas | | | fracas | Classification | -| 47 | dialogue_nli | pietrolesci/dialogue_nli | | | dialogue_nli | Classification | -| 48 | mpe | pietrolesci/mpe | | | mpe_nli | Classification | -| 49 | dnc | pietrolesci/dnc | | | dnc_nli | Classification | -| 50 | gpt3_nli | pietrolesci/gpt3_nli | | | gpt3_nli | Classification | -| 51 | recast_white/fnplus | pietrolesci/recast_white | | fnplus | recast_white__fnplus | Classification | -| 52 | recast_white/sprl | pietrolesci/recast_white | | sprl | recast_white__sprl | Classification | -| 53 | recast_white/dpr | pietrolesci/recast_white | | dpr | recast_white__dpr | Classification | -| 54 | joci | pietrolesci/joci | | | joci | Classification | -| 55 | contrast_nli | martn-nguyen/contrast_nli | | | contrast_nli | Classification | -| 56 | robust_nli/IS_CS | pietrolesci/robust_nli | | IS_CS | robust_nli__IS_CS | Classification | -| 57 | robust_nli/LI_LI | pietrolesci/robust_nli | | LI_LI | robust_nli__LI_LI | Classification | -| 58 | robust_nli/ST_WO | pietrolesci/robust_nli | | ST_WO | robust_nli__ST_WO | Classification | -| 59 | robust_nli/PI_SP | pietrolesci/robust_nli | | PI_SP | robust_nli__PI_SP | Classification | -| 60 | robust_nli/PI_CD | pietrolesci/robust_nli | | PI_CD | robust_nli__PI_CD | Classification | -| 61 | robust_nli/ST_SE | pietrolesci/robust_nli | | ST_SE | robust_nli__ST_SE | Classification | -| 62 | robust_nli/ST_NE | pietrolesci/robust_nli | | ST_NE | robust_nli__ST_NE | Classification | -| 63 | robust_nli/ST_LM | pietrolesci/robust_nli | | ST_LM | robust_nli__ST_LM | Classification | -| 64 | robust_nli_is_sd | pietrolesci/robust_nli_is_sd | | | robust_nli_is_sd | Classification | -| 65 | robust_nli_li_ts | pietrolesci/robust_nli_li_ts | | | robust_nli_li_ts | Classification | -| 66 | gen_debiased_nli/snli_seq_z | pietrolesci/gen_debiased_nli | | snli_seq_z | gen_debiased_nli__snli_seq_z | Classification | -| 67 | gen_debiased_nli/snli_z_aug | pietrolesci/gen_debiased_nli | | snli_z_aug | gen_debiased_nli__snli_z_aug | Classification | -| 68 | gen_debiased_nli/snli_par_z | pietrolesci/gen_debiased_nli | | snli_par_z | gen_debiased_nli__snli_par_z | Classification | -| 69 | gen_debiased_nli/mnli_par_z | pietrolesci/gen_debiased_nli | | mnli_par_z | gen_debiased_nli__mnli_par_z | Classification | -| 70 | gen_debiased_nli/mnli_z_aug | pietrolesci/gen_debiased_nli | | mnli_z_aug | gen_debiased_nli__mnli_z_aug | Classification | -| 71 | gen_debiased_nli/mnli_seq_z | pietrolesci/gen_debiased_nli | | mnli_seq_z | gen_debiased_nli__mnli_seq_z | Classification | -| 72 | add_one_rte | pietrolesci/add_one_rte | | | add_one_rte | Classification | -| 73 | imppres/presupposition_both_presupposition/presupposition | metaeval/imppres | presupposition_both_presupposition | presupposition | imppres__presupposition | Classification | -| 74 | imppres/presupposition_cleft_existence/presupposition | metaeval/imppres | presupposition_cleft_existence | presupposition | imppres__presupposition | Classification | -| 75 | imppres/presupposition_all_n_presupposition/presupposition | metaeval/imppres | presupposition_all_n_presupposition | presupposition | imppres__presupposition | Classification | -| 76 | imppres/presupposition_possessed_definites_existence/presupposition | metaeval/imppres | presupposition_possessed_definites_existence | presupposition | imppres__presupposition | Classification | -| 77 | imppres/presupposition_cleft_uniqueness/presupposition | metaeval/imppres | presupposition_cleft_uniqueness | presupposition | imppres__presupposition | Classification | -| 78 | imppres/presupposition_question_presupposition/presupposition | metaeval/imppres | presupposition_question_presupposition | presupposition | imppres__presupposition | Classification | -| 79 | imppres/presupposition_possessed_definites_uniqueness/presupposition | metaeval/imppres | presupposition_possessed_definites_uniqueness | presupposition | imppres__presupposition | Classification | -| 80 | imppres/presupposition_change_of_state/presupposition | metaeval/imppres | presupposition_change_of_state | presupposition | imppres__presupposition | Classification | -| 81 | imppres/presupposition_only_presupposition/presupposition | metaeval/imppres | presupposition_only_presupposition | presupposition | imppres__presupposition | Classification | -| 82 | imppres/implicature_modals/prag | metaeval/imppres | implicature_modals | prag | imppres__prag | Classification | -| 83 | imppres/implicature_quantifiers/prag | metaeval/imppres | implicature_quantifiers | prag | imppres__prag | Classification | -| 84 | imppres/implicature_numerals_2_3/prag | metaeval/imppres | implicature_numerals_2_3 | prag | imppres__prag | Classification | -| 85 | imppres/implicature_connectives/prag | metaeval/imppres | implicature_connectives | prag | imppres__prag | Classification | -| 86 | imppres/implicature_gradable_adjective/prag | metaeval/imppres | implicature_gradable_adjective | prag | imppres__prag | Classification | -| 87 | imppres/implicature_gradable_verb/prag | metaeval/imppres | implicature_gradable_verb | prag | imppres__prag | Classification | -| 88 | imppres/implicature_numerals_10_100/prag | metaeval/imppres | implicature_numerals_10_100 | prag | imppres__prag | Classification | -| 89 | imppres/implicature_connectives/log | metaeval/imppres | implicature_connectives | log | imppres__log | Classification | -| 90 | imppres/implicature_gradable_verb/log | metaeval/imppres | implicature_gradable_verb | log | imppres__log | Classification | -| 91 | imppres/implicature_gradable_adjective/log | metaeval/imppres | implicature_gradable_adjective | log | imppres__log | Classification | -| 92 | imppres/implicature_numerals_2_3/log | metaeval/imppres | implicature_numerals_2_3 | log | imppres__log | Classification | -| 93 | imppres/implicature_numerals_10_100/log | metaeval/imppres | implicature_numerals_10_100 | log | imppres__log | Classification | -| 94 | imppres/implicature_modals/log | metaeval/imppres | implicature_modals | log | imppres__log | Classification | -| 95 | imppres/implicature_quantifiers/log | metaeval/imppres | implicature_quantifiers | log | imppres__log | Classification | -| 96 | glue_diagnostics/diagnostics | pietrolesci/glue_diagnostics | | diagnostics | glue__diagnostics | Classification | -| 97 | hlgd | hlgd | | | hlgd | Classification | -| 98 | paws/labeled_final | paws | labeled_final | | paws___labeled_final | Classification | -| 99 | paws/labeled_swap | paws | labeled_swap | | paws___labeled_swap | Classification | -| 100 | quora | quora | | | quora | Classification | -| 101 | medical_questions_pairs | medical_questions_pairs | | | medical_questions_pairs | Classification | -| 102 | conll2003/pos_tags | conll2003 | | pos_tags | conll2003__pos_tags | TokenClassification | -| 103 | conll2003/chunk_tags | conll2003 | | chunk_tags | conll2003__chunk_tags | TokenClassification | -| 104 | conll2003/ner_tags | conll2003 | | ner_tags | conll2003__ner_tags | TokenClassification | -| 105 | hh-rlhf | Anthropic/hh-rlhf | | | anthropic_rlhf | MultipleChoice | -| 106 | model-written-evals | Anthropic/model-written-evals | | | model_written_evals | MultipleChoice | -| 107 | truthful_qa/multiple_choice | truthful_qa | multiple_choice | | truthful_qa___multiple_choice | MultipleChoice | -| 108 | fig-qa | nightingal3/fig-qa | | | fig_qa | MultipleChoice | -| 109 | bigbench/metaphor_boolean | bigbench | metaphor_boolean | | bigbench | MultipleChoice | -| 110 | bigbench/odd_one_out | bigbench | odd_one_out | | bigbench | MultipleChoice | -| 111 | bigbench/mathematical_induction | bigbench | mathematical_induction | | bigbench | MultipleChoice | -| 112 | bigbench/mnist_ascii | bigbench | mnist_ascii | | bigbench | MultipleChoice | -| 113 | bigbench/snarks | bigbench | snarks | | bigbench | MultipleChoice | -| 114 | bigbench/anachronisms | bigbench | anachronisms | | bigbench | MultipleChoice | -| 115 | bigbench/key_value_maps | bigbench | key_value_maps | | bigbench | MultipleChoice | -| 116 | bigbench/movie_recommendation | bigbench | movie_recommendation | | bigbench | MultipleChoice | -| 117 | bigbench/color | bigbench | color | | bigbench | MultipleChoice | -| 118 | bigbench/sentence_ambiguity | bigbench | sentence_ambiguity | | bigbench | MultipleChoice | -| 119 | bigbench/abstract_narrative_understanding | bigbench | abstract_narrative_understanding | | bigbench | MultipleChoice | -| 120 | bigbench/figure_of_speech_detection | bigbench | figure_of_speech_detection | | bigbench | MultipleChoice | -| 121 | bigbench/date_understanding | bigbench | date_understanding | | bigbench | MultipleChoice | -| 122 | bigbench/similarities_abstraction | bigbench | similarities_abstraction | | bigbench | MultipleChoice | -| 123 | bigbench/hhh_alignment | bigbench | hhh_alignment | | bigbench | MultipleChoice | -| 124 | bigbench/salient_translation_error_detection | bigbench | salient_translation_error_detection | | bigbench | MultipleChoice | -| 125 | bigbench/play_dialog_same_or_different | bigbench | play_dialog_same_or_different | | bigbench | MultipleChoice | -| 126 | bigbench/crass_ai | bigbench | crass_ai | | bigbench | MultipleChoice | -| 127 | bigbench/metaphor_understanding | bigbench | metaphor_understanding | | bigbench | MultipleChoice | -| 128 | bigbench/ruin_names | bigbench | ruin_names | | bigbench | MultipleChoice | -| 129 | bigbench/penguins_in_a_table | bigbench | penguins_in_a_table | | bigbench | MultipleChoice | -| 130 | bigbench/identify_math_theorems | bigbench | identify_math_theorems | | bigbench | MultipleChoice | -| 131 | bigbench/question_selection | bigbench | question_selection | | bigbench | MultipleChoice | -| 132 | bigbench/general_knowledge | bigbench | general_knowledge | | bigbench | MultipleChoice | -| 133 | bigbench/logical_deduction | bigbench | logical_deduction | | bigbench | MultipleChoice | -| 134 | bigbench/sports_understanding | bigbench | sports_understanding | | bigbench | MultipleChoice | -| 135 | bigbench/physics | bigbench | physics | | bigbench | MultipleChoice | -| 136 | bigbench/analytic_entailment | bigbench | analytic_entailment | | bigbench | MultipleChoice | -| 137 | bigbench/cause_and_effect | bigbench | cause_and_effect | | bigbench | MultipleChoice | -| 138 | bigbench/international_phonetic_alphabet_nli | bigbench | international_phonetic_alphabet_nli | | bigbench | MultipleChoice | -| 139 | bigbench/physical_intuition | bigbench | physical_intuition | | bigbench | MultipleChoice | -| 140 | bigbench/phrase_relatedness | bigbench | phrase_relatedness | | bigbench | MultipleChoice | -| 141 | bigbench/presuppositions_as_nli | bigbench | presuppositions_as_nli | | bigbench | MultipleChoice | -| 142 | bigbench/irony_identification | bigbench | irony_identification | | bigbench | MultipleChoice | -| 143 | bigbench/geometric_shapes | bigbench | geometric_shapes | | bigbench | MultipleChoice | -| 144 | bigbench/riddle_sense | bigbench | riddle_sense | | bigbench | MultipleChoice | -| 145 | bigbench/novel_concepts | bigbench | novel_concepts | | bigbench | MultipleChoice | -| 146 | bigbench/implicatures | bigbench | implicatures | | bigbench | MultipleChoice | -| 147 | bigbench/logic_grid_puzzle | bigbench | logic_grid_puzzle | | bigbench | MultipleChoice | -| 148 | bigbench/discourse_marker_prediction | bigbench | discourse_marker_prediction | | bigbench | MultipleChoice | -| 149 | bigbench/navigate | bigbench | navigate | | bigbench | MultipleChoice | -| 150 | bigbench/movie_dialog_same_or_different | bigbench | movie_dialog_same_or_different | | bigbench | MultipleChoice | -| 151 | bigbench/misconceptions | bigbench | misconceptions | | bigbench | MultipleChoice | -| 152 | bigbench/logical_fallacy_detection | bigbench | logical_fallacy_detection | | bigbench | MultipleChoice | -| 153 | bigbench/arithmetic | bigbench | arithmetic | | bigbench | MultipleChoice | -| 154 | bigbench/cifar10_classification | bigbench | cifar10_classification | | bigbench | MultipleChoice | -| 155 | bigbench/contextual_parametric_knowledge_conflicts | bigbench | contextual_parametric_knowledge_conflicts | | bigbench | MultipleChoice | -| 156 | bigbench/bbq_lite_json | bigbench | bbq_lite_json | | bigbench | MultipleChoice | -| 157 | bigbench/analogical_similarity | bigbench | analogical_similarity | | bigbench | MultipleChoice | -| 158 | bigbench/emojis_emotion_prediction | bigbench | emojis_emotion_prediction | | bigbench | MultipleChoice | -| 159 | bigbench/disambiguation_qa | bigbench | disambiguation_qa | | bigbench | MultipleChoice | -| 160 | bigbench/vitaminc_fact_verification | bigbench | vitaminc_fact_verification | | bigbench | MultipleChoice | -| 161 | bigbench/nonsense_words_grammar | bigbench | nonsense_words_grammar | | bigbench | MultipleChoice | -| 162 | bigbench/known_unknowns | bigbench | known_unknowns | | bigbench | MultipleChoice | -| 163 | bigbench/implicit_relations | bigbench | implicit_relations | | bigbench | MultipleChoice | -| 164 | bigbench/logical_sequence | bigbench | logical_sequence | | bigbench | MultipleChoice | -| 165 | bigbench/goal_step_wikihow | bigbench | goal_step_wikihow | | bigbench | MultipleChoice | -| 166 | bigbench/suicide_risk | bigbench | suicide_risk | | bigbench | MultipleChoice | -| 167 | bigbench/empirical_judgments | bigbench | empirical_judgments | | bigbench | MultipleChoice | -| 168 | bigbench/formal_fallacies_syllogisms_negation | bigbench | formal_fallacies_syllogisms_negation | | bigbench | MultipleChoice | -| 169 | bigbench/unit_interpretation | bigbench | unit_interpretation | | bigbench | MultipleChoice | -| 170 | bigbench/dark_humor_detection | bigbench | dark_humor_detection | | bigbench | MultipleChoice | -| 171 | bigbench/logical_args | bigbench | logical_args | | bigbench | MultipleChoice | -| 172 | bigbench/authorship_verification | bigbench | authorship_verification | | bigbench | MultipleChoice | -| 173 | bigbench/english_proverbs | bigbench | english_proverbs | | bigbench | MultipleChoice | -| 174 | bigbench/causal_judgment | bigbench | causal_judgment | | bigbench | MultipleChoice | -| 175 | bigbench/code_line_description | bigbench | code_line_description | | bigbench | MultipleChoice | -| 176 | bigbench/dyck_languages | bigbench | dyck_languages | | bigbench | MultipleChoice | -| 177 | bigbench/crash_blossom | bigbench | crash_blossom | | bigbench | MultipleChoice | -| 178 | bigbench/temporal_sequences | bigbench | temporal_sequences | | bigbench | MultipleChoice | -| 179 | bigbench/conceptual_combinations | bigbench | conceptual_combinations | | bigbench | MultipleChoice | -| 180 | bigbench/simple_ethical_questions | bigbench | simple_ethical_questions | | bigbench | MultipleChoice | -| 181 | bigbench/real_or_fake_text | bigbench | real_or_fake_text | | bigbench | MultipleChoice | -| 182 | bigbench/strange_stories | bigbench | strange_stories | | bigbench | MultipleChoice | -| 183 | bigbench/symbol_interpretation | bigbench | symbol_interpretation | | bigbench | MultipleChoice | -| 184 | bigbench/hindu_knowledge | bigbench | hindu_knowledge | | bigbench | MultipleChoice | -| 185 | bigbench/human_organs_senses | bigbench | human_organs_senses | | bigbench | MultipleChoice | -| 186 | bigbench/moral_permissibility | bigbench | moral_permissibility | | bigbench | MultipleChoice | -| 187 | bigbench/identify_odd_metaphor | bigbench | identify_odd_metaphor | | bigbench | MultipleChoice | -| 188 | bigbench/intent_recognition | bigbench | intent_recognition | | bigbench | MultipleChoice | -| 189 | bigbench/reasoning_about_colored_objects | bigbench | reasoning_about_colored_objects | | bigbench | MultipleChoice | -| 190 | bigbench/understanding_fables | bigbench | understanding_fables | | bigbench | MultipleChoice | -| 191 | bigbench/gre_reading_comprehension | bigbench | gre_reading_comprehension | | bigbench | MultipleChoice | -| 192 | bigbench/emoji_movie | bigbench | emoji_movie | | bigbench | MultipleChoice | -| 193 | bigbench/social_support | bigbench | social_support | | bigbench | MultipleChoice | -| 194 | bigbench/winowhy | bigbench | winowhy | | bigbench | MultipleChoice | -| 195 | bigbench/checkmate_in_one | bigbench | checkmate_in_one | | bigbench | MultipleChoice | -| 196 | bigbench/timedial | bigbench | timedial | | bigbench | MultipleChoice | -| 197 | bigbench/undo_permutation | bigbench | undo_permutation | | bigbench | MultipleChoice | -| 198 | bigbench/strategyqa | bigbench | strategyqa | | bigbench | MultipleChoice | -| 199 | bigbench/elementary_math_qa | bigbench | elementary_math_qa | | bigbench | MultipleChoice | -| 200 | bigbench/evaluating_information_essentiality | bigbench | evaluating_information_essentiality | | bigbench | MultipleChoice | -| 201 | bigbench/tracking_shuffled_objects | bigbench | tracking_shuffled_objects | | bigbench | MultipleChoice | -| 202 | bigbench/entailed_polarity | bigbench | entailed_polarity | | bigbench | MultipleChoice | -| 203 | bigbench/intersect_geometry | bigbench | intersect_geometry | | bigbench | MultipleChoice | -| 204 | bigbench/fact_checker | bigbench | fact_checker | | bigbench | MultipleChoice | -| 205 | bigbench/fantasy_reasoning | bigbench | fantasy_reasoning | | bigbench | MultipleChoice | -| 206 | bigbench/cs_algorithms | bigbench | cs_algorithms | | bigbench | MultipleChoice | -| 207 | bigbench/epistemic_reasoning | bigbench | epistemic_reasoning | | bigbench | MultipleChoice | -| 208 | bigbench/hyperbaton | bigbench | hyperbaton | | bigbench | MultipleChoice | -| 209 | blimp/principle_A_reconstruction | blimp | principle_A_reconstruction | | blimp_hard | MultipleChoice | -| 210 | blimp/existential_there_quantifiers_2 | blimp | existential_there_quantifiers_2 | | blimp_hard | MultipleChoice | -| 211 | blimp/animate_subject_passive | blimp | animate_subject_passive | | blimp_hard | MultipleChoice | -| 212 | blimp/wh_questions_object_gap | blimp | wh_questions_object_gap | | blimp_hard | MultipleChoice | -| 213 | blimp/principle_A_domain_2 | blimp | principle_A_domain_2 | | blimp_hard | MultipleChoice | -| 214 | blimp/tough_vs_raising_1 | blimp | tough_vs_raising_1 | | blimp_hard | MultipleChoice | -| 215 | blimp/sentential_subject_island | blimp | sentential_subject_island | | blimp_hard | MultipleChoice | -| 216 | blimp/npi_present_1 | blimp | npi_present_1 | | blimp_hard | MultipleChoice | -| 217 | blimp/inchoative | blimp | inchoative | | blimp_hard | MultipleChoice | -| 218 | blimp/coordinate_structure_constraint_object_extraction | blimp | coordinate_structure_constraint_object_extraction | | blimp_hard | MultipleChoice | -| 219 | blimp/wh_vs_that_with_gap | blimp | wh_vs_that_with_gap | | blimp_hard | MultipleChoice | -| 220 | blimp/principle_A_c_command | blimp | principle_A_c_command | | blimp_hard | MultipleChoice | -| 221 | blimp/matrix_question_npi_licensor_present | blimp | matrix_question_npi_licensor_present | | blimp_hard | MultipleChoice | -| 222 | blimp/drop_argument | blimp | drop_argument | | blimp_hard | MultipleChoice | -| 223 | blimp/left_branch_island_echo_question | blimp | left_branch_island_echo_question | | blimp_hard | MultipleChoice | -| 224 | blimp/coordinate_structure_constraint_complex_left_branch | blimp | coordinate_structure_constraint_complex_left_branch | | blimp_hard | MultipleChoice | -| 225 | blimp/wh_vs_that_with_gap_long_distance | blimp | wh_vs_that_with_gap_long_distance | | blimp_hard | MultipleChoice | -| 226 | blimp/complex_NP_island | blimp | complex_NP_island | | blimp_hard | MultipleChoice | -| 227 | blimp/npi_present_2 | blimp | npi_present_2 | | blimp_hard | MultipleChoice | -| 228 | blimp/sentential_negation_npi_scope | blimp | sentential_negation_npi_scope | | blimp_hard | MultipleChoice | -| 229 | blimp/wh_questions_subject_gap_long_distance | blimp | wh_questions_subject_gap_long_distance | | blimp_hard | MultipleChoice | -| 230 | blimp/superlative_quantifiers_1 | blimp | superlative_quantifiers_1 | | blimp_hard | MultipleChoice | -| 231 | cos_e/v1.0 | cos_e | v1.0 | | cos_e | MultipleChoice | -| 232 | cosmos_qa | cosmos_qa | | | cosmos_qa | MultipleChoice | -| 233 | dream | dream | | | dream | MultipleChoice | -| 234 | openbookqa | openbookqa | | | openbookqa | MultipleChoice | -| 235 | qasc | qasc | | | qasc | MultipleChoice | -| 236 | quartz | quartz | | | quartz | MultipleChoice | -| 237 | quail | quail | | | quail | MultipleChoice | -| 238 | head_qa/en | head_qa | en | | head_qa___en | MultipleChoice | -| 239 | sciq | sciq | | | sciq | MultipleChoice | -| 240 | social_i_qa | social_i_qa | | | social_i_qa | MultipleChoice | -| 241 | wiki_hop | wiki_hop | | | wiki_hop | MultipleChoice | -| 242 | wiqa | wiqa | | | wiqa | MultipleChoice | -| 243 | piqa | piqa | | | piqa | MultipleChoice | -| 244 | hellaswag | hellaswag | | | hellaswag | MultipleChoice | -| 245 | super_glue/copa | super_glue | copa | | super_glue___copa | MultipleChoice | -| 246 | balanced-copa | pkavumba/balanced-copa | | | balanced_copa | MultipleChoice | -| 247 | art | art | | | art | MultipleChoice | -| 248 | hendrycks_test/professional_accounting | hendrycks_test | professional_accounting | | hendrycks_test | MultipleChoice | -| 249 | hendrycks_test/moral_disputes | hendrycks_test | moral_disputes | | hendrycks_test | MultipleChoice | -| 250 | hendrycks_test/moral_scenarios | hendrycks_test | moral_scenarios | | hendrycks_test | MultipleChoice | -| 251 | hendrycks_test/nutrition | hendrycks_test | nutrition | | hendrycks_test | MultipleChoice | -| 252 | hendrycks_test/philosophy | hendrycks_test | philosophy | | hendrycks_test | MultipleChoice | -| 253 | hendrycks_test/prehistory | hendrycks_test | prehistory | | hendrycks_test | MultipleChoice | -| 254 | hendrycks_test/professional_law | hendrycks_test | professional_law | | hendrycks_test | MultipleChoice | -| 255 | hendrycks_test/world_religions | hendrycks_test | world_religions | | hendrycks_test | MultipleChoice | -| 256 | hendrycks_test/professional_psychology | hendrycks_test | professional_psychology | | hendrycks_test | MultipleChoice | -| 257 | hendrycks_test/public_relations | hendrycks_test | public_relations | | hendrycks_test | MultipleChoice | -| 258 | hendrycks_test/security_studies | hendrycks_test | security_studies | | hendrycks_test | MultipleChoice | -| 259 | hendrycks_test/sociology | hendrycks_test | sociology | | hendrycks_test | MultipleChoice | -| 260 | hendrycks_test/us_foreign_policy | hendrycks_test | us_foreign_policy | | hendrycks_test | MultipleChoice | -| 261 | hendrycks_test/virology | hendrycks_test | virology | | hendrycks_test | MultipleChoice | -| 262 | hendrycks_test/miscellaneous | hendrycks_test | miscellaneous | | hendrycks_test | MultipleChoice | -| 263 | hendrycks_test/professional_medicine | hendrycks_test | professional_medicine | | hendrycks_test | MultipleChoice | -| 264 | hendrycks_test/medical_genetics | hendrycks_test | medical_genetics | | hendrycks_test | MultipleChoice | -| 265 | hendrycks_test/electrical_engineering | hendrycks_test | electrical_engineering | | hendrycks_test | MultipleChoice | -| 266 | hendrycks_test/management | hendrycks_test | management | | hendrycks_test | MultipleChoice | -| 267 | hendrycks_test/global_facts | hendrycks_test | global_facts | | hendrycks_test | MultipleChoice | -| 268 | hendrycks_test/formal_logic | hendrycks_test | formal_logic | | hendrycks_test | MultipleChoice | -| 269 | hendrycks_test/elementary_mathematics | hendrycks_test | elementary_mathematics | | hendrycks_test | MultipleChoice | -| 270 | hendrycks_test/econometrics | hendrycks_test | econometrics | | hendrycks_test | MultipleChoice | -| 271 | hendrycks_test/conceptual_physics | hendrycks_test | conceptual_physics | | hendrycks_test | MultipleChoice | -| 272 | hendrycks_test/computer_security | hendrycks_test | computer_security | | hendrycks_test | MultipleChoice | -| 273 | hendrycks_test/college_physics | hendrycks_test | college_physics | | hendrycks_test | MultipleChoice | -| 274 | hendrycks_test/college_medicine | hendrycks_test | college_medicine | | hendrycks_test | MultipleChoice | -| 275 | hendrycks_test/college_mathematics | hendrycks_test | college_mathematics | | hendrycks_test | MultipleChoice | -| 276 | hendrycks_test/college_computer_science | hendrycks_test | college_computer_science | | hendrycks_test | MultipleChoice | -| 277 | hendrycks_test/college_chemistry | hendrycks_test | college_chemistry | | hendrycks_test | MultipleChoice | -| 278 | hendrycks_test/college_biology | hendrycks_test | college_biology | | hendrycks_test | MultipleChoice | -| 279 | hendrycks_test/clinical_knowledge | hendrycks_test | clinical_knowledge | | hendrycks_test | MultipleChoice | -| 280 | hendrycks_test/business_ethics | hendrycks_test | business_ethics | | hendrycks_test | MultipleChoice | -| 281 | hendrycks_test/astronomy | hendrycks_test | astronomy | | hendrycks_test | MultipleChoice | -| 282 | hendrycks_test/anatomy | hendrycks_test | anatomy | | hendrycks_test | MultipleChoice | -| 283 | hendrycks_test/abstract_algebra | hendrycks_test | abstract_algebra | | hendrycks_test | MultipleChoice | -| 284 | hendrycks_test/high_school_biology | hendrycks_test | high_school_biology | | hendrycks_test | MultipleChoice | -| 285 | hendrycks_test/high_school_computer_science | hendrycks_test | high_school_computer_science | | hendrycks_test | MultipleChoice | -| 286 | hendrycks_test/high_school_chemistry | hendrycks_test | high_school_chemistry | | hendrycks_test | MultipleChoice | -| 287 | hendrycks_test/high_school_physics | hendrycks_test | high_school_physics | | hendrycks_test | MultipleChoice | -| 288 | hendrycks_test/machine_learning | hendrycks_test | machine_learning | | hendrycks_test | MultipleChoice | -| 289 | hendrycks_test/logical_fallacies | hendrycks_test | logical_fallacies | | hendrycks_test | MultipleChoice | -| 290 | hendrycks_test/jurisprudence | hendrycks_test | jurisprudence | | hendrycks_test | MultipleChoice | -| 291 | hendrycks_test/international_law | hendrycks_test | international_law | | hendrycks_test | MultipleChoice | -| 292 | hendrycks_test/human_sexuality | hendrycks_test | human_sexuality | | hendrycks_test | MultipleChoice | -| 293 | hendrycks_test/human_aging | hendrycks_test | human_aging | | hendrycks_test | MultipleChoice | -| 294 | hendrycks_test/high_school_world_history | hendrycks_test | high_school_world_history | | hendrycks_test | MultipleChoice | -| 295 | hendrycks_test/high_school_us_history | hendrycks_test | high_school_us_history | | hendrycks_test | MultipleChoice | -| 296 | hendrycks_test/high_school_statistics | hendrycks_test | high_school_statistics | | hendrycks_test | MultipleChoice | -| 297 | hendrycks_test/high_school_psychology | hendrycks_test | high_school_psychology | | hendrycks_test | MultipleChoice | -| 298 | hendrycks_test/marketing | hendrycks_test | marketing | | hendrycks_test | MultipleChoice | -| 299 | hendrycks_test/high_school_microeconomics | hendrycks_test | high_school_microeconomics | | hendrycks_test | MultipleChoice | -| 300 | hendrycks_test/high_school_mathematics | hendrycks_test | high_school_mathematics | | hendrycks_test | MultipleChoice | -| 301 | hendrycks_test/high_school_macroeconomics | hendrycks_test | high_school_macroeconomics | | hendrycks_test | MultipleChoice | -| 302 | hendrycks_test/high_school_government_and_politics | hendrycks_test | high_school_government_and_politics | | hendrycks_test | MultipleChoice | -| 303 | hendrycks_test/high_school_geography | hendrycks_test | high_school_geography | | hendrycks_test | MultipleChoice | -| 304 | hendrycks_test/high_school_european_history | hendrycks_test | high_school_european_history | | hendrycks_test | MultipleChoice | -| 305 | winogrande/winogrande_xl | winogrande | winogrande_xl | | winogrande | MultipleChoice | -| 306 | codah/codah | codah | codah | | codah | MultipleChoice | -| 307 | ai2_arc/ARC-Easy/challenge | ai2_arc | ARC-Easy | challenge | ai2_arc__challenge | MultipleChoice | -| 308 | ai2_arc/ARC-Challenge/challenge | ai2_arc | ARC-Challenge | challenge | ai2_arc__challenge | MultipleChoice | -| 309 | definite_pronoun_resolution | definite_pronoun_resolution | | | definite_pronoun_resolution | MultipleChoice | -| 310 | swag | swag | | | swag | MultipleChoice | -| 311 | math_qa | math_qa | | | math_qa | MultipleChoice | -| 312 | utilitarianism | metaeval/utilitarianism | | | utilitarianism | Classification | -| 313 | amazon_counterfactual/en | mteb/amazon_counterfactual | en | | amazon_counterfactual | Classification | -| 314 | insincere-questions | SetFit/insincere-questions | | | insincere_questions | Classification | -| 315 | toxic_conversations | SetFit/toxic_conversations | | | toxic_conversations | Classification | -| 316 | TuringBench | turingbench/TuringBench | | | turingbench | Classification | -| 317 | trec | trec | | | trec | Classification | -| 318 | vitaminc/tals--vitaminc | tals/vitaminc | tals--vitaminc | | tals_vitaminc | Classification | -| 319 | hope_edi/english | hope_edi | english | | hope_edi | Classification | -| 320 | rumoureval_2019/RumourEval2019 | strombergnlp/rumoureval_2019 | RumourEval2019 | | rumoureval_2019 | Classification | -| 321 | ethos/binary | ethos | binary | | ethos___binary | Classification | -| 322 | ethos/multilabel | ethos | multilabel | | ethos___multilabel | Classification | -| 323 | glue/cola | glue | cola | | glue___cola | Classification | -| 324 | glue/sst2 | glue | sst2 | | glue___sst2 | Classification | -| 325 | glue/mrpc | glue | mrpc | | glue___mrpc | Classification | -| 326 | glue/qqp | glue | qqp | | glue___qqp | Classification | -| 327 | glue/stsb | glue | stsb | | glue___stsb | Classification | -| 328 | glue/mnli | glue | mnli | | glue___mnli | Classification | -| 329 | glue/qnli | glue | qnli | | glue___qnli | Classification | -| 330 | glue/rte | glue | rte | | glue___rte | Classification | -| 331 | glue/wnli | glue | wnli | | glue___wnli | Classification | -| 332 | super_glue/boolq | super_glue | boolq | | super_glue___boolq | Classification | -| 333 | super_glue/cb | super_glue | cb | | super_glue___cb | Classification | -| 334 | super_glue/multirc | super_glue | multirc | | super_glue___multirc | Classification | -| 335 | super_glue/wic | super_glue | wic | | super_glue___wic | Classification | -| 336 | super_glue/axg | super_glue | axg | | super_glue___axg | Classification | -| 337 | tweet_eval/hate | tweet_eval | hate | | tweet_eval | Classification | -| 338 | tweet_eval/irony | tweet_eval | irony | | tweet_eval | Classification | -| 339 | tweet_eval/sentiment | tweet_eval | sentiment | | tweet_eval | Classification | -| 340 | tweet_eval/stance_abortion | tweet_eval | stance_abortion | | tweet_eval | Classification | -| 341 | tweet_eval/stance_atheism | tweet_eval | stance_atheism | | tweet_eval | Classification | -| 342 | tweet_eval/stance_climate | tweet_eval | stance_climate | | tweet_eval | Classification | -| 343 | tweet_eval/stance_feminist | tweet_eval | stance_feminist | | tweet_eval | Classification | -| 344 | tweet_eval/stance_hillary | tweet_eval | stance_hillary | | tweet_eval | Classification | -| 345 | tweet_eval/offensive | tweet_eval | offensive | | tweet_eval | Classification | -| 346 | tweet_eval/emoji | tweet_eval | emoji | | tweet_eval | Classification | -| 347 | tweet_eval/emotion | tweet_eval | emotion | | tweet_eval | Classification | -| 348 | discovery/discovery | discovery | discovery | | discovery | Classification | -| 349 | pragmeval/emobank-valence | pragmeval | emobank-valence | | pragmeval_1 | Classification | -| 350 | pragmeval/squinky-informativeness | pragmeval | squinky-informativeness | | pragmeval_1 | Classification | -| 351 | pragmeval/emobank-dominance | pragmeval | emobank-dominance | | pragmeval_1 | Classification | -| 352 | pragmeval/emobank-arousal | pragmeval | emobank-arousal | | pragmeval_1 | Classification | -| 353 | pragmeval/switchboard | pragmeval | switchboard | | pragmeval_1 | Classification | -| 354 | pragmeval/mrda | pragmeval | mrda | | pragmeval_1 | Classification | -| 355 | pragmeval/verifiability | pragmeval | verifiability | | pragmeval_1 | Classification | -| 356 | pragmeval/squinky-formality | pragmeval | squinky-formality | | pragmeval_1 | Classification | -| 357 | pragmeval/squinky-implicature | pragmeval | squinky-implicature | | pragmeval_1 | Classification | -| 358 | pragmeval/persuasiveness-eloquence | pragmeval | persuasiveness-eloquence | | pragmeval_2 | Classification | -| 359 | pragmeval/gum | pragmeval | gum | | pragmeval_2 | Classification | -| 360 | pragmeval/emergent | pragmeval | emergent | | pragmeval_2 | Classification | -| 361 | pragmeval/persuasiveness-premisetype | pragmeval | persuasiveness-premisetype | | pragmeval_2 | Classification | -| 362 | pragmeval/persuasiveness-relevance | pragmeval | persuasiveness-relevance | | pragmeval_2 | Classification | -| 363 | pragmeval/persuasiveness-specificity | pragmeval | persuasiveness-specificity | | pragmeval_2 | Classification | -| 364 | pragmeval/persuasiveness-strength | pragmeval | persuasiveness-strength | | pragmeval_2 | Classification | -| 365 | pragmeval/sarcasm | pragmeval | sarcasm | | pragmeval_2 | Classification | -| 366 | pragmeval/stac | pragmeval | stac | | pragmeval_2 | Classification | -| 367 | pragmeval/persuasiveness-claimtype | pragmeval | persuasiveness-claimtype | | pragmeval_2 | Classification | -| 368 | pragmeval/pdtb | pragmeval | pdtb | | pragmeval_2 | Classification | -| 369 | silicone/meld_s | silicone | meld_s | | silicone | Classification | -| 370 | silicone/sem | silicone | sem | | silicone | Classification | -| 371 | silicone/dyda_e | silicone | dyda_e | | silicone | Classification | -| 372 | silicone/dyda_da | silicone | dyda_da | | silicone | Classification | -| 373 | silicone/meld_e | silicone | meld_e | | silicone | Classification | -| 374 | silicone/maptask | silicone | maptask | | silicone | Classification | -| 375 | silicone/iemocap | silicone | iemocap | | silicone | Classification | -| 376 | silicone/oasis | silicone | oasis | | silicone | Classification | -| 377 | lex_glue/eurlex | lex_glue | eurlex | | lex_glue___eurlex | Classification | -| 378 | lex_glue/scotus | lex_glue | scotus | | lex_glue___scotus | Classification | -| 379 | lex_glue/ledgar | lex_glue | ledgar | | lex_glue___ledgar | Classification | -| 380 | lex_glue/unfair_tos | lex_glue | unfair_tos | | lex_glue___unfair_tos | Classification | -| 381 | lex_glue/case_hold | lex_glue | case_hold | | lex_glue___case_hold | MultipleChoice | -| 382 | language-identification | papluca/language-identification | | | language_identification | Classification | -| 383 | imdb | imdb | | | imdb | Classification | -| 384 | rotten_tomatoes | rotten_tomatoes | | | rotten_tomatoes | Classification | -| 385 | ag_news | ag_news | | | ag_news | Classification | -| 386 | yelp_review_full/yelp_review_full | yelp_review_full | yelp_review_full | | yelp_review_full | Classification | -| 387 | financial_phrasebank/sentences_allagree | financial_phrasebank | sentences_allagree | | financial_phrasebank | Classification | -| 388 | poem_sentiment | poem_sentiment | | | poem_sentiment | Classification | -| 389 | dbpedia_14/dbpedia_14 | dbpedia_14 | dbpedia_14 | | dbpedia_14 | Classification | -| 390 | amazon_polarity/amazon_polarity | amazon_polarity | amazon_polarity | | amazon_polarity | Classification | -| 391 | app_reviews | app_reviews | | | app_reviews | Classification | -| 392 | hate_speech18 | hate_speech18 | | | hate_speech18 | Classification | -| 393 | sms_spam | sms_spam | | | sms_spam | Classification | -| 394 | humicroedit/subtask-1 | humicroedit | subtask-1 | | humicroedit___subtask_1 | Classification | -| 395 | humicroedit/subtask-2 | humicroedit | subtask-2 | | humicroedit___subtask_2 | Classification | -| 396 | snips_built_in_intents | snips_built_in_intents | | | snips_built_in_intents | Classification | -| 397 | banking77 | banking77 | | | banking77 | Classification | -| 398 | hate_speech_offensive | hate_speech_offensive | | | hate_speech_offensive | Classification | -| 399 | yahoo_answers_topics/yahoo_answers_topics | yahoo_answers_topics | yahoo_answers_topics | | yahoo_answers_topics | Classification | -| 400 | stackoverflow-questions | pacovaldez/stackoverflow-questions | | | stackoverflow_questions | Classification | -| 401 | hyperpartisan_news_detection/byarticle | hyperpartisan_news_detection | byarticle | | hyperpartisan_news_detection___byarticle | Classification | -| 402 | hyperpartisan_news_detection/bypublisher | hyperpartisan_news_detection | bypublisher | | hyperpartisan_news_detection___bypublisher | Classification | -| 403 | go_emotions/simplified | go_emotions | simplified | | go_emotions___simplified | Classification | -| 404 | scicite | scicite | | | scicite | Classification | -| 405 | liar | liar | | | liar | Classification | -| 406 | lexical_relation_classification/K&H+N | relbert/lexical_relation_classification | K&H+N | | relbert_lexical_relation_classification | Classification | -| 407 | lexical_relation_classification/BLESS | relbert/lexical_relation_classification | BLESS | | relbert_lexical_relation_classification | Classification | -| 408 | lexical_relation_classification/CogALexV | relbert/lexical_relation_classification | CogALexV | | relbert_lexical_relation_classification | Classification | -| 409 | lexical_relation_classification/EVALution | relbert/lexical_relation_classification | EVALution | | relbert_lexical_relation_classification | Classification | -| 410 | lexical_relation_classification/ROOT09 | relbert/lexical_relation_classification | ROOT09 | | relbert_lexical_relation_classification | Classification | -| 411 | linguisticprobing/top_constituents | metaeval/linguisticprobing | top_constituents | | metaeval_linguisticprobing | Classification | -| 412 | linguisticprobing/tree_depth | metaeval/linguisticprobing | tree_depth | | metaeval_linguisticprobing | Classification | -| 413 | linguisticprobing/coordination_inversion | metaeval/linguisticprobing | coordination_inversion | | metaeval_linguisticprobing | Classification | -| 414 | linguisticprobing/odd_man_out | metaeval/linguisticprobing | odd_man_out | | metaeval_linguisticprobing | Classification | -| 415 | linguisticprobing/sentence_length | metaeval/linguisticprobing | sentence_length | | metaeval_linguisticprobing | Classification | -| 416 | linguisticprobing/subj_number | metaeval/linguisticprobing | subj_number | | metaeval_linguisticprobing | Classification | -| 417 | linguisticprobing/word_content | metaeval/linguisticprobing | word_content | | metaeval_linguisticprobing | Classification | -| 418 | linguisticprobing/obj_number | metaeval/linguisticprobing | obj_number | | metaeval_linguisticprobing | Classification | -| 419 | linguisticprobing/past_present | metaeval/linguisticprobing | past_present | | metaeval_linguisticprobing | Classification | -| 420 | linguisticprobing/bigram_shift | metaeval/linguisticprobing | bigram_shift | | metaeval_linguisticprobing | Classification | -| 421 | crowdflower/tweet_global_warming | metaeval/crowdflower | tweet_global_warming | | metaeval_crowdflower | Classification | -| 422 | crowdflower/airline-sentiment | metaeval/crowdflower | airline-sentiment | | metaeval_crowdflower | Classification | -| 423 | crowdflower/sentiment_nuclear_power | metaeval/crowdflower | sentiment_nuclear_power | | metaeval_crowdflower | Classification | -| 424 | crowdflower/economic-news | metaeval/crowdflower | economic-news | | metaeval_crowdflower | Classification | -| 425 | crowdflower/corporate-messaging | metaeval/crowdflower | corporate-messaging | | metaeval_crowdflower | Classification | -| 426 | crowdflower/political-media-message | metaeval/crowdflower | political-media-message | | metaeval_crowdflower | Classification | -| 427 | crowdflower/text_emotion | metaeval/crowdflower | text_emotion | | metaeval_crowdflower | Classification | -| 428 | crowdflower/political-media-bias | metaeval/crowdflower | political-media-bias | | metaeval_crowdflower | Classification | -| 429 | crowdflower/political-media-audience | metaeval/crowdflower | political-media-audience | | metaeval_crowdflower | Classification | -| 430 | ethics/commonsense | metaeval/ethics | commonsense | | metaeval_ethics___commonsense | Classification | -| 431 | ethics/deontology | metaeval/ethics | deontology | | metaeval_ethics___deontology | Classification | -| 432 | ethics/justice | metaeval/ethics | justice | | metaeval_ethics___justice | Classification | -| 433 | ethics/virtue | metaeval/ethics | virtue | | metaeval_ethics___virtue | Classification | -| 434 | emo/emo2019 | emo | emo2019 | | emo | Classification | -| 435 | google_wellformed_query | google_wellformed_query | | | google_wellformed_query | Classification | -| 436 | tweets_hate_speech_detection | tweets_hate_speech_detection | | | tweets_hate_speech_detection | Classification | -| 437 | adv_glue/adv_sst2 | adv_glue | adv_sst2 | | adv_glue___adv_sst2 | Classification | -| 438 | adv_glue/adv_qqp | adv_glue | adv_qqp | | adv_glue___adv_qqp | Classification | -| 439 | adv_glue/adv_mnli | adv_glue | adv_mnli | | adv_glue___adv_mnli | Classification | -| 440 | adv_glue/adv_mnli_mismatched | adv_glue | adv_mnli_mismatched | | adv_glue___adv_mnli_mismatched | Classification | -| 441 | adv_glue/adv_qnli | adv_glue | adv_qnli | | adv_glue___adv_qnli | Classification | -| 442 | adv_glue/adv_rte | adv_glue | adv_rte | | adv_glue___adv_rte | Classification | -| 443 | has_part | has_part | | | has_part | Classification | -| 444 | wnut_17/wnut_17 | wnut_17 | wnut_17 | | wnut_17 | TokenClassification | -| 445 | ncbi_disease/ncbi_disease | ncbi_disease | ncbi_disease | | ncbi_disease | TokenClassification | -| 446 | acronym_identification | acronym_identification | | | acronym_identification | TokenClassification | -| 447 | jnlpba/jnlpba | jnlpba | jnlpba | | jnlpba | TokenClassification | -| 448 | species_800/species_800 | species_800 | species_800 | | species_800 | TokenClassification | -| 449 | ontonotes_english/SpeedOfMagic--ontonotes_english | SpeedOfMagic/ontonotes_english | SpeedOfMagic--ontonotes_english | | SpeedOfMagic_ontonotes_english | TokenClassification | -| 450 | blog_authorship_corpus/gender | blog_authorship_corpus | | gender | blog_authorship_corpus__gender | Classification | -| 451 | blog_authorship_corpus/age | blog_authorship_corpus | | age | blog_authorship_corpus__age | Classification | -| 452 | blog_authorship_corpus/horoscope | blog_authorship_corpus | | horoscope | blog_authorship_corpus__horoscope | Classification | -| 453 | blog_authorship_corpus/job | blog_authorship_corpus | | job | blog_authorship_corpus__job | Classification | -| 454 | open_question_type | launch/open_question_type | | | launch_open_question_type | Classification | -| 455 | health_fact | health_fact | | | health_fact | Classification | -| 456 | commonsense_qa | commonsense_qa | | | commonsense_qa | MultipleChoice | -| 457 | mc_taco | mc_taco | | | mc_taco | Classification | -| 458 | ade_corpus_v2/Ade_corpus_v2_classification | ade_corpus_v2 | Ade_corpus_v2_classification | | ade_corpus_v2___Ade_corpus_v2_classification | Classification | -| 459 | discosense | prajjwal1/discosense | | | discosense | MultipleChoice | -| 460 | circa | circa | | | circa | Classification | -| 461 | code_x_glue_cc_defect_detection | code_x_glue_cc_defect_detection | | | code_x_glue_cc_defect_detection | Classification | -| 462 | code_x_glue_cc_clone_detection_big_clone_bench | code_x_glue_cc_clone_detection_big_clone_bench | | | code_x_glue_cc_clone_detection_big_clone_bench | Classification | -| 463 | code_x_glue_cc_code_refinement/medium | code_x_glue_cc_code_refinement | medium | | code_x_glue_cc_code_refinement | MultipleChoice | -| 464 | EffectiveFeedbackStudentWriting | YaHi/EffectiveFeedbackStudentWriting | | | effective_feedback_student_writing | Classification | -| 465 | promptSentiment | Ericwang/promptSentiment | | | promptSentiment | Classification | -| 466 | promptNLI | Ericwang/promptNLI | | | promptNLI | Classification | -| 467 | promptSpoke | Ericwang/promptSpoke | | | promptSpoke | Classification | -| 468 | promptProficiency | Ericwang/promptProficiency | | | promptProficiency | Classification | -| 469 | promptGrammar | Ericwang/promptGrammar | | | promptGrammar | Classification | -| 470 | promptCoherence | Ericwang/promptCoherence | | | promptCoherence | Classification | -| 471 | phrase_similarity | PiC/phrase_similarity | | | phrase_similarity | Classification | -| 472 | scientific-exaggeration-detection | copenlu/scientific-exaggeration-detection | | | exaggeration_detection | Classification | -| 473 | quarel | quarel | | | quarel | Classification | -| 474 | fever-evidence-related/mwong--fever-related | mwong/fever-evidence-related | mwong--fever-related | | mwong_fever_evidence_related | Classification | -| 475 | numer_sense | numer_sense | | | numer_sense | Classification | -| 476 | dynasent/dynabench.dynasent.r1.all/r1 | dynabench/dynasent | dynabench.dynasent.r1.all | r1 | dynasent__r1 | Classification | -| 477 | dynasent/dynabench.dynasent.r2.all/r2 | dynabench/dynasent | dynabench.dynasent.r2.all | r2 | dynasent__r2 | Classification | -| 478 | Sarcasm_News_Headline | raquiba/Sarcasm_News_Headline | | | sarcasm_news | Classification | -| 479 | sem_eval_2010_task_8 | sem_eval_2010_task_8 | | | sem_eval_2010_task_8 | Classification | -| 480 | auditor_review/demo-org--auditor_review | demo-org/auditor_review | demo-org--auditor_review | | demo_org_auditor_review | Classification | +| | id | dataset_name | config_name | task_name | preprocessing_name | task_type | +|----:|:---------------------------------------------------------------------|:------------------------------------------|:----------------------------------------------------|:---------------|:---------------------------------------------|:--------------------| +| 0 | anli/a1 | anli | | a1 | anli__a1 | Classification | +| 1 | anli/a2 | anli | | a2 | anli__a2 | Classification | +| 2 | anli/a3 | anli | | a3 | anli__a3 | Classification | +| 3 | babi_nli/basic-deduction | metaeval/babi_nli | basic-deduction | | babi_nli | Classification | +| 4 | babi_nli/basic-induction | metaeval/babi_nli | basic-induction | | babi_nli | Classification | +| 5 | babi_nli/time-reasoning | metaeval/babi_nli | time-reasoning | | babi_nli | Classification | +| 6 | babi_nli/simple-negation | metaeval/babi_nli | simple-negation | | babi_nli | Classification | +| 7 | babi_nli/three-supporting-facts | metaeval/babi_nli | three-supporting-facts | | babi_nli | Classification | +| 8 | babi_nli/path-finding | metaeval/babi_nli | path-finding | | babi_nli | Classification | +| 9 | babi_nli/single-supporting-fact | metaeval/babi_nli | single-supporting-fact | | babi_nli | Classification | +| 10 | babi_nli/yes-no-questions | metaeval/babi_nli | yes-no-questions | | babi_nli | Classification | +| 11 | babi_nli/lists-sets | metaeval/babi_nli | lists-sets | | babi_nli | Classification | +| 12 | babi_nli/two-arg-relations | metaeval/babi_nli | two-arg-relations | | babi_nli | Classification | +| 13 | babi_nli/conjunction | metaeval/babi_nli | conjunction | | babi_nli | Classification | +| 14 | babi_nli/compound-coreference | metaeval/babi_nli | compound-coreference | | babi_nli | Classification | +| 15 | babi_nli/basic-coreference | metaeval/babi_nli | basic-coreference | | babi_nli | Classification | +| 16 | babi_nli/size-reasoning | metaeval/babi_nli | size-reasoning | | babi_nli | Classification | +| 17 | babi_nli/positional-reasoning | metaeval/babi_nli | positional-reasoning | | babi_nli | Classification | +| 18 | babi_nli/two-supporting-facts | metaeval/babi_nli | two-supporting-facts | | babi_nli | Classification | +| 19 | babi_nli/three-arg-relations | metaeval/babi_nli | three-arg-relations | | babi_nli | Classification | +| 20 | babi_nli/indefinite-knowledge | metaeval/babi_nli | indefinite-knowledge | | babi_nli | Classification | +| 21 | babi_nli/counting | metaeval/babi_nli | counting | | babi_nli | Classification | +| 22 | lingnli | metaeval/lingnli | | | ling_nli | Classification | +| 23 | sick/label | sick | | label | sick__label | Classification | +| 24 | sick/relatedness | sick | | relatedness | sick__relatedness | Classification | +| 25 | sick/entailment_AB | sick | | entailment_AB | sick__entailment_AB | Classification | +| 26 | sick/entailment_BA | sick | | entailment_BA | sick__entailment_BA | Classification | +| 27 | snli | snli | | | snli | Classification | +| 28 | scitail/snli_format | scitail | snli_format | | scitail | Classification | +| 29 | hans | hans | | | hans | Classification | +| 30 | WANLI | alisawuffles/WANLI | | | wanli | Classification | +| 31 | recast/recast_megaveridicality | metaeval/recast | recast_megaveridicality | | recast | Classification | +| 32 | recast/recast_sentiment | metaeval/recast | recast_sentiment | | recast | Classification | +| 33 | recast/recast_ner | metaeval/recast | recast_ner | | recast | Classification | +| 34 | recast/recast_verbcorner | metaeval/recast | recast_verbcorner | | recast | Classification | +| 35 | recast/recast_verbnet | metaeval/recast | recast_verbnet | | recast | Classification | +| 36 | recast/recast_factuality | metaeval/recast | recast_factuality | | recast | Classification | +| 37 | recast/recast_puns | metaeval/recast | recast_puns | | recast | Classification | +| 38 | recast/recast_kg_relations | metaeval/recast | recast_kg_relations | | recast | Classification | +| 39 | probability_words_nli/reasoning_1hop | sileod/probability_words_nli | reasoning_1hop | | probability_words_nli | Classification | +| 40 | probability_words_nli/reasoning_2hop | sileod/probability_words_nli | reasoning_2hop | | probability_words_nli | Classification | +| 41 | probability_words_nli/usnli | sileod/probability_words_nli | usnli | | probability_words_nli | Classification | +| 42 | nan-nli/joey234--nan-nli | joey234/nan-nli | joey234--nan-nli | | nan_nli | Classification | +| 43 | nli_fever | pietrolesci/nli_fever | | | nli_fever | Classification | +| 44 | breaking_nli | pietrolesci/breaking_nli | | | breaking_nli | Classification | +| 45 | conj_nli | pietrolesci/conj_nli | | | conj_nli | Classification | +| 46 | fracas | pietrolesci/fracas | | | fracas | Classification | +| 47 | dialogue_nli | pietrolesci/dialogue_nli | | | dialogue_nli | Classification | +| 48 | mpe | pietrolesci/mpe | | | mpe_nli | Classification | +| 49 | dnc | pietrolesci/dnc | | | dnc_nli | Classification | +| 50 | gpt3_nli | pietrolesci/gpt3_nli | | | gpt3_nli | Classification | +| 51 | recast_white/fnplus | pietrolesci/recast_white | | fnplus | recast_white__fnplus | Classification | +| 52 | recast_white/sprl | pietrolesci/recast_white | | sprl | recast_white__sprl | Classification | +| 53 | recast_white/dpr | pietrolesci/recast_white | | dpr | recast_white__dpr | Classification | +| 54 | joci | pietrolesci/joci | | | joci | Classification | +| 55 | contrast_nli | martn-nguyen/contrast_nli | | | contrast_nli | Classification | +| 56 | robust_nli/IS_CS | pietrolesci/robust_nli | | IS_CS | robust_nli__IS_CS | Classification | +| 57 | robust_nli/LI_LI | pietrolesci/robust_nli | | LI_LI | robust_nli__LI_LI | Classification | +| 58 | robust_nli/ST_WO | pietrolesci/robust_nli | | ST_WO | robust_nli__ST_WO | Classification | +| 59 | robust_nli/PI_SP | pietrolesci/robust_nli | | PI_SP | robust_nli__PI_SP | Classification | +| 60 | robust_nli/PI_CD | pietrolesci/robust_nli | | PI_CD | robust_nli__PI_CD | Classification | +| 61 | robust_nli/ST_SE | pietrolesci/robust_nli | | ST_SE | robust_nli__ST_SE | Classification | +| 62 | robust_nli/ST_NE | pietrolesci/robust_nli | | ST_NE | robust_nli__ST_NE | Classification | +| 63 | robust_nli/ST_LM | pietrolesci/robust_nli | | ST_LM | robust_nli__ST_LM | Classification | +| 64 | robust_nli_is_sd | pietrolesci/robust_nli_is_sd | | | robust_nli_is_sd | Classification | +| 65 | robust_nli_li_ts | pietrolesci/robust_nli_li_ts | | | robust_nli_li_ts | Classification | +| 66 | gen_debiased_nli/snli_seq_z | pietrolesci/gen_debiased_nli | | snli_seq_z | gen_debiased_nli__snli_seq_z | Classification | +| 67 | gen_debiased_nli/snli_z_aug | pietrolesci/gen_debiased_nli | | snli_z_aug | gen_debiased_nli__snli_z_aug | Classification | +| 68 | gen_debiased_nli/snli_par_z | pietrolesci/gen_debiased_nli | | snli_par_z | gen_debiased_nli__snli_par_z | Classification | +| 69 | gen_debiased_nli/mnli_par_z | pietrolesci/gen_debiased_nli | | mnli_par_z | gen_debiased_nli__mnli_par_z | Classification | +| 70 | gen_debiased_nli/mnli_z_aug | pietrolesci/gen_debiased_nli | | mnli_z_aug | gen_debiased_nli__mnli_z_aug | Classification | +| 71 | gen_debiased_nli/mnli_seq_z | pietrolesci/gen_debiased_nli | | mnli_seq_z | gen_debiased_nli__mnli_seq_z | Classification | +| 72 | add_one_rte | pietrolesci/add_one_rte | | | add_one_rte | Classification | +| 73 | imppres/presupposition_both_presupposition/presupposition | metaeval/imppres | presupposition_both_presupposition | presupposition | imppres__presupposition | Classification | +| 74 | imppres/presupposition_change_of_state/presupposition | metaeval/imppres | presupposition_change_of_state | presupposition | imppres__presupposition | Classification | +| 75 | imppres/presupposition_cleft_existence/presupposition | metaeval/imppres | presupposition_cleft_existence | presupposition | imppres__presupposition | Classification | +| 76 | imppres/presupposition_cleft_uniqueness/presupposition | metaeval/imppres | presupposition_cleft_uniqueness | presupposition | imppres__presupposition | Classification | +| 77 | imppres/presupposition_possessed_definites_existence/presupposition | metaeval/imppres | presupposition_possessed_definites_existence | presupposition | imppres__presupposition | Classification | +| 78 | imppres/presupposition_only_presupposition/presupposition | metaeval/imppres | presupposition_only_presupposition | presupposition | imppres__presupposition | Classification | +| 79 | imppres/presupposition_possessed_definites_uniqueness/presupposition | metaeval/imppres | presupposition_possessed_definites_uniqueness | presupposition | imppres__presupposition | Classification | +| 80 | imppres/presupposition_question_presupposition/presupposition | metaeval/imppres | presupposition_question_presupposition | presupposition | imppres__presupposition | Classification | +| 81 | imppres/presupposition_all_n_presupposition/presupposition | metaeval/imppres | presupposition_all_n_presupposition | presupposition | imppres__presupposition | Classification | +| 82 | imppres/implicature_connectives/prag | metaeval/imppres | implicature_connectives | prag | imppres__prag | Classification | +| 83 | imppres/implicature_numerals_10_100/prag | metaeval/imppres | implicature_numerals_10_100 | prag | imppres__prag | Classification | +| 84 | imppres/implicature_numerals_2_3/prag | metaeval/imppres | implicature_numerals_2_3 | prag | imppres__prag | Classification | +| 85 | imppres/implicature_modals/prag | metaeval/imppres | implicature_modals | prag | imppres__prag | Classification | +| 86 | imppres/implicature_gradable_verb/prag | metaeval/imppres | implicature_gradable_verb | prag | imppres__prag | Classification | +| 87 | imppres/implicature_gradable_adjective/prag | metaeval/imppres | implicature_gradable_adjective | prag | imppres__prag | Classification | +| 88 | imppres/implicature_quantifiers/prag | metaeval/imppres | implicature_quantifiers | prag | imppres__prag | Classification | +| 89 | imppres/implicature_numerals_10_100/log | metaeval/imppres | implicature_numerals_10_100 | log | imppres__log | Classification | +| 90 | imppres/implicature_gradable_adjective/log | metaeval/imppres | implicature_gradable_adjective | log | imppres__log | Classification | +| 91 | imppres/implicature_gradable_verb/log | metaeval/imppres | implicature_gradable_verb | log | imppres__log | Classification | +| 92 | imppres/implicature_modals/log | metaeval/imppres | implicature_modals | log | imppres__log | Classification | +| 93 | imppres/implicature_connectives/log | metaeval/imppres | implicature_connectives | log | imppres__log | Classification | +| 94 | imppres/implicature_quantifiers/log | metaeval/imppres | implicature_quantifiers | log | imppres__log | Classification | +| 95 | imppres/implicature_numerals_2_3/log | metaeval/imppres | implicature_numerals_2_3 | log | imppres__log | Classification | +| 96 | glue_diagnostics/diagnostics | pietrolesci/glue_diagnostics | | diagnostics | glue__diagnostics | Classification | +| 97 | hlgd | hlgd | | | hlgd | Classification | +| 98 | paws/labeled_final | paws | labeled_final | | paws___labeled_final | Classification | +| 99 | paws/labeled_swap | paws | labeled_swap | | paws___labeled_swap | Classification | +| 100 | quora | quora | | | quora | Classification | +| 101 | medical_questions_pairs | medical_questions_pairs | | | medical_questions_pairs | Classification | +| 102 | conll2003/pos_tags | conll2003 | | pos_tags | conll2003__pos_tags | TokenClassification | +| 103 | conll2003/chunk_tags | conll2003 | | chunk_tags | conll2003__chunk_tags | TokenClassification | +| 104 | conll2003/ner_tags | conll2003 | | ner_tags | conll2003__ner_tags | TokenClassification | +| 105 | hh-rlhf | Anthropic/hh-rlhf | | | anthropic_rlhf | MultipleChoice | +| 106 | model-written-evals | Anthropic/model-written-evals | | | model_written_evals | MultipleChoice | +| 107 | truthful_qa/multiple_choice | truthful_qa | multiple_choice | | truthful_qa___multiple_choice | MultipleChoice | +| 108 | fig-qa | nightingal3/fig-qa | | | fig_qa | MultipleChoice | +| 109 | bigbench/crass_ai | bigbench | crass_ai | | bigbench | MultipleChoice | +| 110 | bigbench/elementary_math_qa | bigbench | elementary_math_qa | | bigbench | MultipleChoice | +| 111 | bigbench/logical_args | bigbench | logical_args | | bigbench | MultipleChoice | +| 112 | bigbench/tracking_shuffled_objects | bigbench | tracking_shuffled_objects | | bigbench | MultipleChoice | +| 113 | bigbench/novel_concepts | bigbench | novel_concepts | | bigbench | MultipleChoice | +| 114 | bigbench/anachronisms | bigbench | anachronisms | | bigbench | MultipleChoice | +| 115 | bigbench/analogical_similarity | bigbench | analogical_similarity | | bigbench | MultipleChoice | +| 116 | bigbench/cs_algorithms | bigbench | cs_algorithms | | bigbench | MultipleChoice | +| 117 | bigbench/timedial | bigbench | timedial | | bigbench | MultipleChoice | +| 118 | bigbench/metaphor_boolean | bigbench | metaphor_boolean | | bigbench | MultipleChoice | +| 119 | bigbench/logical_fallacy_detection | bigbench | logical_fallacy_detection | | bigbench | MultipleChoice | +| 120 | bigbench/mnist_ascii | bigbench | mnist_ascii | | bigbench | MultipleChoice | +| 121 | bigbench/disambiguation_qa | bigbench | disambiguation_qa | | bigbench | MultipleChoice | +| 122 | bigbench/hhh_alignment | bigbench | hhh_alignment | | bigbench | MultipleChoice | +| 123 | bigbench/goal_step_wikihow | bigbench | goal_step_wikihow | | bigbench | MultipleChoice | +| 124 | bigbench/physical_intuition | bigbench | physical_intuition | | bigbench | MultipleChoice | +| 125 | bigbench/suicide_risk | bigbench | suicide_risk | | bigbench | MultipleChoice | +| 126 | bigbench/causal_judgment | bigbench | causal_judgment | | bigbench | MultipleChoice | +| 127 | bigbench/movie_dialog_same_or_different | bigbench | movie_dialog_same_or_different | | bigbench | MultipleChoice | +| 128 | bigbench/epistemic_reasoning | bigbench | epistemic_reasoning | | bigbench | MultipleChoice | +| 129 | bigbench/vitaminc_fact_verification | bigbench | vitaminc_fact_verification | | bigbench | MultipleChoice | +| 130 | bigbench/navigate | bigbench | navigate | | bigbench | MultipleChoice | +| 131 | bigbench/key_value_maps | bigbench | key_value_maps | | bigbench | MultipleChoice | +| 132 | bigbench/winowhy | bigbench | winowhy | | bigbench | MultipleChoice | +| 133 | bigbench/cause_and_effect | bigbench | cause_and_effect | | bigbench | MultipleChoice | +| 134 | bigbench/geometric_shapes | bigbench | geometric_shapes | | bigbench | MultipleChoice | +| 135 | bigbench/simple_ethical_questions | bigbench | simple_ethical_questions | | bigbench | MultipleChoice | +| 136 | bigbench/snarks | bigbench | snarks | | bigbench | MultipleChoice | +| 137 | bigbench/gre_reading_comprehension | bigbench | gre_reading_comprehension | | bigbench | MultipleChoice | +| 138 | bigbench/strategyqa | bigbench | strategyqa | | bigbench | MultipleChoice | +| 139 | bigbench/metaphor_understanding | bigbench | metaphor_understanding | | bigbench | MultipleChoice | +| 140 | bigbench/understanding_fables | bigbench | understanding_fables | | bigbench | MultipleChoice | +| 141 | bigbench/temporal_sequences | bigbench | temporal_sequences | | bigbench | MultipleChoice | +| 142 | bigbench/arithmetic | bigbench | arithmetic | | bigbench | MultipleChoice | +| 143 | bigbench/checkmate_in_one | bigbench | checkmate_in_one | | bigbench | MultipleChoice | +| 144 | bigbench/crash_blossom | bigbench | crash_blossom | | bigbench | MultipleChoice | +| 145 | bigbench/fact_checker | bigbench | fact_checker | | bigbench | MultipleChoice | +| 146 | bigbench/logic_grid_puzzle | bigbench | logic_grid_puzzle | | bigbench | MultipleChoice | +| 147 | bigbench/emoji_movie | bigbench | emoji_movie | | bigbench | MultipleChoice | +| 148 | bigbench/discourse_marker_prediction | bigbench | discourse_marker_prediction | | bigbench | MultipleChoice | +| 149 | bigbench/salient_translation_error_detection | bigbench | salient_translation_error_detection | | bigbench | MultipleChoice | +| 150 | bigbench/implicit_relations | bigbench | implicit_relations | | bigbench | MultipleChoice | +| 151 | bigbench/identify_odd_metaphor | bigbench | identify_odd_metaphor | | bigbench | MultipleChoice | +| 152 | bigbench/entailed_polarity | bigbench | entailed_polarity | | bigbench | MultipleChoice | +| 153 | bigbench/similarities_abstraction | bigbench | similarities_abstraction | | bigbench | MultipleChoice | +| 154 | bigbench/code_line_description | bigbench | code_line_description | | bigbench | MultipleChoice | +| 155 | bigbench/sports_understanding | bigbench | sports_understanding | | bigbench | MultipleChoice | +| 156 | bigbench/symbol_interpretation | bigbench | symbol_interpretation | | bigbench | MultipleChoice | +| 157 | bigbench/misconceptions | bigbench | misconceptions | | bigbench | MultipleChoice | +| 158 | bigbench/play_dialog_same_or_different | bigbench | play_dialog_same_or_different | | bigbench | MultipleChoice | +| 159 | bigbench/unit_interpretation | bigbench | unit_interpretation | | bigbench | MultipleChoice | +| 160 | bigbench/logical_sequence | bigbench | logical_sequence | | bigbench | MultipleChoice | +| 161 | bigbench/real_or_fake_text | bigbench | real_or_fake_text | | bigbench | MultipleChoice | +| 162 | bigbench/penguins_in_a_table | bigbench | penguins_in_a_table | | bigbench | MultipleChoice | +| 163 | bigbench/english_proverbs | bigbench | english_proverbs | | bigbench | MultipleChoice | +| 164 | bigbench/intent_recognition | bigbench | intent_recognition | | bigbench | MultipleChoice | +| 165 | bigbench/strange_stories | bigbench | strange_stories | | bigbench | MultipleChoice | +| 166 | bigbench/hyperbaton | bigbench | hyperbaton | | bigbench | MultipleChoice | +| 167 | bigbench/authorship_verification | bigbench | authorship_verification | | bigbench | MultipleChoice | +| 168 | bigbench/date_understanding | bigbench | date_understanding | | bigbench | MultipleChoice | +| 169 | bigbench/empirical_judgments | bigbench | empirical_judgments | | bigbench | MultipleChoice | +| 170 | bigbench/question_selection | bigbench | question_selection | | bigbench | MultipleChoice | +| 171 | bigbench/undo_permutation | bigbench | undo_permutation | | bigbench | MultipleChoice | +| 172 | bigbench/human_organs_senses | bigbench | human_organs_senses | | bigbench | MultipleChoice | +| 173 | bigbench/logical_deduction | bigbench | logical_deduction | | bigbench | MultipleChoice | +| 174 | bigbench/contextual_parametric_knowledge_conflicts | bigbench | contextual_parametric_knowledge_conflicts | | bigbench | MultipleChoice | +| 175 | bigbench/abstract_narrative_understanding | bigbench | abstract_narrative_understanding | | bigbench | MultipleChoice | +| 176 | bigbench/identify_math_theorems | bigbench | identify_math_theorems | | bigbench | MultipleChoice | +| 177 | bigbench/odd_one_out | bigbench | odd_one_out | | bigbench | MultipleChoice | +| 178 | bigbench/dyck_languages | bigbench | dyck_languages | | bigbench | MultipleChoice | +| 179 | bigbench/analytic_entailment | bigbench | analytic_entailment | | bigbench | MultipleChoice | +| 180 | bigbench/sentence_ambiguity | bigbench | sentence_ambiguity | | bigbench | MultipleChoice | +| 181 | bigbench/reasoning_about_colored_objects | bigbench | reasoning_about_colored_objects | | bigbench | MultipleChoice | +| 182 | bigbench/fantasy_reasoning | bigbench | fantasy_reasoning | | bigbench | MultipleChoice | +| 183 | bigbench/presuppositions_as_nli | bigbench | presuppositions_as_nli | | bigbench | MultipleChoice | +| 184 | bigbench/irony_identification | bigbench | irony_identification | | bigbench | MultipleChoice | +| 185 | bigbench/figure_of_speech_detection | bigbench | figure_of_speech_detection | | bigbench | MultipleChoice | +| 186 | bigbench/evaluating_information_essentiality | bigbench | evaluating_information_essentiality | | bigbench | MultipleChoice | +| 187 | bigbench/physics | bigbench | physics | | bigbench | MultipleChoice | +| 188 | bigbench/conceptual_combinations | bigbench | conceptual_combinations | | bigbench | MultipleChoice | +| 189 | bigbench/riddle_sense | bigbench | riddle_sense | | bigbench | MultipleChoice | +| 190 | bigbench/social_support | bigbench | social_support | | bigbench | MultipleChoice | +| 191 | bigbench/phrase_relatedness | bigbench | phrase_relatedness | | bigbench | MultipleChoice | +| 192 | bigbench/moral_permissibility | bigbench | moral_permissibility | | bigbench | MultipleChoice | +| 193 | bigbench/international_phonetic_alphabet_nli | bigbench | international_phonetic_alphabet_nli | | bigbench | MultipleChoice | +| 194 | bigbench/color | bigbench | color | | bigbench | MultipleChoice | +| 195 | bigbench/emojis_emotion_prediction | bigbench | emojis_emotion_prediction | | bigbench | MultipleChoice | +| 196 | bigbench/general_knowledge | bigbench | general_knowledge | | bigbench | MultipleChoice | +| 197 | bigbench/cifar10_classification | bigbench | cifar10_classification | | bigbench | MultipleChoice | +| 198 | bigbench/ruin_names | bigbench | ruin_names | | bigbench | MultipleChoice | +| 199 | bigbench/nonsense_words_grammar | bigbench | nonsense_words_grammar | | bigbench | MultipleChoice | +| 200 | bigbench/intersect_geometry | bigbench | intersect_geometry | | bigbench | MultipleChoice | +| 201 | bigbench/formal_fallacies_syllogisms_negation | bigbench | formal_fallacies_syllogisms_negation | | bigbench | MultipleChoice | +| 202 | bigbench/social_iqa | bigbench | social_iqa | | bigbench | MultipleChoice | +| 203 | bigbench/bbq_lite_json | bigbench | bbq_lite_json | | bigbench | MultipleChoice | +| 204 | bigbench/implicatures | bigbench | implicatures | | bigbench | MultipleChoice | +| 205 | bigbench/movie_recommendation | bigbench | movie_recommendation | | bigbench | MultipleChoice | +| 206 | bigbench/dark_humor_detection | bigbench | dark_humor_detection | | bigbench | MultipleChoice | +| 207 | bigbench/hindu_knowledge | bigbench | hindu_knowledge | | bigbench | MultipleChoice | +| 208 | bigbench/mathematical_induction | bigbench | mathematical_induction | | bigbench | MultipleChoice | +| 209 | bigbench/known_unknowns | bigbench | known_unknowns | | bigbench | MultipleChoice | +| 210 | blimp/drop_argument | blimp | drop_argument | | blimp_hard | MultipleChoice | +| 211 | blimp/animate_subject_passive | blimp | animate_subject_passive | | blimp_hard | MultipleChoice | +| 212 | blimp/sentential_negation_npi_scope | blimp | sentential_negation_npi_scope | | blimp_hard | MultipleChoice | +| 213 | blimp/wh_vs_that_with_gap | blimp | wh_vs_that_with_gap | | blimp_hard | MultipleChoice | +| 214 | blimp/wh_questions_subject_gap_long_distance | blimp | wh_questions_subject_gap_long_distance | | blimp_hard | MultipleChoice | +| 215 | blimp/principle_A_reconstruction | blimp | principle_A_reconstruction | | blimp_hard | MultipleChoice | +| 216 | blimp/wh_questions_object_gap | blimp | wh_questions_object_gap | | blimp_hard | MultipleChoice | +| 217 | blimp/coordinate_structure_constraint_object_extraction | blimp | coordinate_structure_constraint_object_extraction | | blimp_hard | MultipleChoice | +| 218 | blimp/sentential_subject_island | blimp | sentential_subject_island | | blimp_hard | MultipleChoice | +| 219 | blimp/left_branch_island_echo_question | blimp | left_branch_island_echo_question | | blimp_hard | MultipleChoice | +| 220 | blimp/inchoative | blimp | inchoative | | blimp_hard | MultipleChoice | +| 221 | blimp/npi_present_2 | blimp | npi_present_2 | | blimp_hard | MultipleChoice | +| 222 | blimp/wh_vs_that_with_gap_long_distance | blimp | wh_vs_that_with_gap_long_distance | | blimp_hard | MultipleChoice | +| 223 | blimp/principle_A_c_command | blimp | principle_A_c_command | | blimp_hard | MultipleChoice | +| 224 | blimp/tough_vs_raising_1 | blimp | tough_vs_raising_1 | | blimp_hard | MultipleChoice | +| 225 | blimp/existential_there_quantifiers_2 | blimp | existential_there_quantifiers_2 | | blimp_hard | MultipleChoice | +| 226 | blimp/matrix_question_npi_licensor_present | blimp | matrix_question_npi_licensor_present | | blimp_hard | MultipleChoice | +| 227 | blimp/superlative_quantifiers_1 | blimp | superlative_quantifiers_1 | | blimp_hard | MultipleChoice | +| 228 | blimp/coordinate_structure_constraint_complex_left_branch | blimp | coordinate_structure_constraint_complex_left_branch | | blimp_hard | MultipleChoice | +| 229 | blimp/complex_NP_island | blimp | complex_NP_island | | blimp_hard | MultipleChoice | +| 230 | blimp/npi_present_1 | blimp | npi_present_1 | | blimp_hard | MultipleChoice | +| 231 | blimp/principle_A_domain_2 | blimp | principle_A_domain_2 | | blimp_hard | MultipleChoice | +| 232 | cos_e/v1.0 | cos_e | v1.0 | | cos_e | MultipleChoice | +| 233 | cosmos_qa | cosmos_qa | | | cosmos_qa | MultipleChoice | +| 234 | dream | dream | | | dream | MultipleChoice | +| 235 | openbookqa | openbookqa | | | openbookqa | MultipleChoice | +| 236 | qasc | qasc | | | qasc | MultipleChoice | +| 237 | quartz | quartz | | | quartz | MultipleChoice | +| 238 | quail | quail | | | quail | MultipleChoice | +| 239 | head_qa/en | head_qa | en | | head_qa___en | MultipleChoice | +| 240 | sciq | sciq | | | sciq | MultipleChoice | +| 241 | social_i_qa | social_i_qa | | | social_i_qa | MultipleChoice | +| 242 | wiki_hop | wiki_hop | | | wiki_hop | MultipleChoice | +| 243 | wiqa | wiqa | | | wiqa | MultipleChoice | +| 244 | piqa | piqa | | | piqa | MultipleChoice | +| 245 | hellaswag | hellaswag | | | hellaswag | MultipleChoice | +| 246 | super_glue/copa | super_glue | copa | | super_glue___copa | MultipleChoice | +| 247 | balanced-copa | pkavumba/balanced-copa | | | balanced_copa | MultipleChoice | +| 248 | art | art | | | art | MultipleChoice | +| 249 | hendrycks_test/miscellaneous | hendrycks_test | miscellaneous | | hendrycks_test | MultipleChoice | +| 250 | hendrycks_test/moral_disputes | hendrycks_test | moral_disputes | | hendrycks_test | MultipleChoice | +| 251 | hendrycks_test/moral_scenarios | hendrycks_test | moral_scenarios | | hendrycks_test | MultipleChoice | +| 252 | hendrycks_test/nutrition | hendrycks_test | nutrition | | hendrycks_test | MultipleChoice | +| 253 | hendrycks_test/philosophy | hendrycks_test | philosophy | | hendrycks_test | MultipleChoice | +| 254 | hendrycks_test/prehistory | hendrycks_test | prehistory | | hendrycks_test | MultipleChoice | +| 255 | hendrycks_test/professional_accounting | hendrycks_test | professional_accounting | | hendrycks_test | MultipleChoice | +| 256 | hendrycks_test/professional_law | hendrycks_test | professional_law | | hendrycks_test | MultipleChoice | +| 257 | hendrycks_test/public_relations | hendrycks_test | public_relations | | hendrycks_test | MultipleChoice | +| 258 | hendrycks_test/professional_psychology | hendrycks_test | professional_psychology | | hendrycks_test | MultipleChoice | +| 259 | hendrycks_test/security_studies | hendrycks_test | security_studies | | hendrycks_test | MultipleChoice | +| 260 | hendrycks_test/sociology | hendrycks_test | sociology | | hendrycks_test | MultipleChoice | +| 261 | hendrycks_test/us_foreign_policy | hendrycks_test | us_foreign_policy | | hendrycks_test | MultipleChoice | +| 262 | hendrycks_test/virology | hendrycks_test | virology | | hendrycks_test | MultipleChoice | +| 263 | hendrycks_test/world_religions | hendrycks_test | world_religions | | hendrycks_test | MultipleChoice | +| 264 | hendrycks_test/medical_genetics | hendrycks_test | medical_genetics | | hendrycks_test | MultipleChoice | +| 265 | hendrycks_test/professional_medicine | hendrycks_test | professional_medicine | | hendrycks_test | MultipleChoice | +| 266 | hendrycks_test/marketing | hendrycks_test | marketing | | hendrycks_test | MultipleChoice | +| 267 | hendrycks_test/econometrics | hendrycks_test | econometrics | | hendrycks_test | MultipleChoice | +| 268 | hendrycks_test/machine_learning | hendrycks_test | machine_learning | | hendrycks_test | MultipleChoice | +| 269 | hendrycks_test/global_facts | hendrycks_test | global_facts | | hendrycks_test | MultipleChoice | +| 270 | hendrycks_test/formal_logic | hendrycks_test | formal_logic | | hendrycks_test | MultipleChoice | +| 271 | hendrycks_test/elementary_mathematics | hendrycks_test | elementary_mathematics | | hendrycks_test | MultipleChoice | +| 272 | hendrycks_test/electrical_engineering | hendrycks_test | electrical_engineering | | hendrycks_test | MultipleChoice | +| 273 | hendrycks_test/conceptual_physics | hendrycks_test | conceptual_physics | | hendrycks_test | MultipleChoice | +| 274 | hendrycks_test/management | hendrycks_test | management | | hendrycks_test | MultipleChoice | +| 275 | hendrycks_test/college_physics | hendrycks_test | college_physics | | hendrycks_test | MultipleChoice | +| 276 | hendrycks_test/college_medicine | hendrycks_test | college_medicine | | hendrycks_test | MultipleChoice | +| 277 | hendrycks_test/college_mathematics | hendrycks_test | college_mathematics | | hendrycks_test | MultipleChoice | +| 278 | hendrycks_test/college_computer_science | hendrycks_test | college_computer_science | | hendrycks_test | MultipleChoice | +| 279 | hendrycks_test/college_chemistry | hendrycks_test | college_chemistry | | hendrycks_test | MultipleChoice | +| 280 | hendrycks_test/college_biology | hendrycks_test | college_biology | | hendrycks_test | MultipleChoice | +| 281 | hendrycks_test/clinical_knowledge | hendrycks_test | clinical_knowledge | | hendrycks_test | MultipleChoice | +| 282 | hendrycks_test/business_ethics | hendrycks_test | business_ethics | | hendrycks_test | MultipleChoice | +| 283 | hendrycks_test/astronomy | hendrycks_test | astronomy | | hendrycks_test | MultipleChoice | +| 284 | hendrycks_test/anatomy | hendrycks_test | anatomy | | hendrycks_test | MultipleChoice | +| 285 | hendrycks_test/abstract_algebra | hendrycks_test | abstract_algebra | | hendrycks_test | MultipleChoice | +| 286 | hendrycks_test/high_school_biology | hendrycks_test | high_school_biology | | hendrycks_test | MultipleChoice | +| 287 | hendrycks_test/high_school_chemistry | hendrycks_test | high_school_chemistry | | hendrycks_test | MultipleChoice | +| 288 | hendrycks_test/computer_security | hendrycks_test | computer_security | | hendrycks_test | MultipleChoice | +| 289 | hendrycks_test/jurisprudence | hendrycks_test | jurisprudence | | hendrycks_test | MultipleChoice | +| 290 | hendrycks_test/logical_fallacies | hendrycks_test | logical_fallacies | | hendrycks_test | MultipleChoice | +| 291 | hendrycks_test/international_law | hendrycks_test | international_law | | hendrycks_test | MultipleChoice | +| 292 | hendrycks_test/human_sexuality | hendrycks_test | human_sexuality | | hendrycks_test | MultipleChoice | +| 293 | hendrycks_test/human_aging | hendrycks_test | human_aging | | hendrycks_test | MultipleChoice | +| 294 | hendrycks_test/high_school_world_history | hendrycks_test | high_school_world_history | | hendrycks_test | MultipleChoice | +| 295 | hendrycks_test/high_school_us_history | hendrycks_test | high_school_us_history | | hendrycks_test | MultipleChoice | +| 296 | hendrycks_test/high_school_statistics | hendrycks_test | high_school_statistics | | hendrycks_test | MultipleChoice | +| 297 | hendrycks_test/high_school_physics | hendrycks_test | high_school_physics | | hendrycks_test | MultipleChoice | +| 298 | hendrycks_test/high_school_psychology | hendrycks_test | high_school_psychology | | hendrycks_test | MultipleChoice | +| 299 | hendrycks_test/high_school_mathematics | hendrycks_test | high_school_mathematics | | hendrycks_test | MultipleChoice | +| 300 | hendrycks_test/high_school_macroeconomics | hendrycks_test | high_school_macroeconomics | | hendrycks_test | MultipleChoice | +| 301 | hendrycks_test/high_school_government_and_politics | hendrycks_test | high_school_government_and_politics | | hendrycks_test | MultipleChoice | +| 302 | hendrycks_test/high_school_geography | hendrycks_test | high_school_geography | | hendrycks_test | MultipleChoice | +| 303 | hendrycks_test/high_school_european_history | hendrycks_test | high_school_european_history | | hendrycks_test | MultipleChoice | +| 304 | hendrycks_test/high_school_computer_science | hendrycks_test | high_school_computer_science | | hendrycks_test | MultipleChoice | +| 305 | hendrycks_test/high_school_microeconomics | hendrycks_test | high_school_microeconomics | | hendrycks_test | MultipleChoice | +| 306 | winogrande/winogrande_xl | winogrande | winogrande_xl | | winogrande | MultipleChoice | +| 307 | codah/codah | codah | codah | | codah | MultipleChoice | +| 308 | ai2_arc/ARC-Challenge/challenge | ai2_arc | ARC-Challenge | challenge | ai2_arc__challenge | MultipleChoice | +| 309 | ai2_arc/ARC-Easy/challenge | ai2_arc | ARC-Easy | challenge | ai2_arc__challenge | MultipleChoice | +| 310 | definite_pronoun_resolution | definite_pronoun_resolution | | | definite_pronoun_resolution | MultipleChoice | +| 311 | swag | swag | | | swag | MultipleChoice | +| 312 | math_qa | math_qa | | | math_qa | MultipleChoice | +| 313 | utilitarianism | metaeval/utilitarianism | | | utilitarianism | Classification | +| 314 | amazon_counterfactual/en | mteb/amazon_counterfactual | en | | amazon_counterfactual | Classification | +| 315 | insincere-questions | SetFit/insincere-questions | | | insincere_questions | Classification | +| 316 | toxic_conversations | SetFit/toxic_conversations | | | toxic_conversations | Classification | +| 317 | TuringBench | turingbench/TuringBench | | | turingbench | Classification | +| 318 | trec | trec | | | trec | Classification | +| 319 | vitaminc/tals--vitaminc | tals/vitaminc | tals--vitaminc | | tals_vitaminc | Classification | +| 320 | hope_edi/english | hope_edi | english | | hope_edi | Classification | +| 321 | rumoureval_2019/RumourEval2019 | strombergnlp/rumoureval_2019 | RumourEval2019 | | rumoureval_2019 | Classification | +| 322 | ethos/binary | ethos | binary | | ethos___binary | Classification | +| 323 | ethos/multilabel | ethos | multilabel | | ethos___multilabel | Classification | +| 324 | glue/cola | glue | cola | | glue___cola | Classification | +| 325 | glue/sst2 | glue | sst2 | | glue___sst2 | Classification | +| 326 | glue/mrpc | glue | mrpc | | glue___mrpc | Classification | +| 327 | glue/qqp | glue | qqp | | glue___qqp | Classification | +| 328 | glue/stsb | glue | stsb | | glue___stsb | Classification | +| 329 | glue/mnli | glue | mnli | | glue___mnli | Classification | +| 330 | glue/qnli | glue | qnli | | glue___qnli | Classification | +| 331 | glue/rte | glue | rte | | glue___rte | Classification | +| 332 | glue/wnli | glue | wnli | | glue___wnli | Classification | +| 333 | super_glue/boolq | super_glue | boolq | | super_glue___boolq | Classification | +| 334 | super_glue/cb | super_glue | cb | | super_glue___cb | Classification | +| 335 | super_glue/multirc | super_glue | multirc | | super_glue___multirc | Classification | +| 336 | super_glue/wic | super_glue | wic | | super_glue___wic | Classification | +| 337 | super_glue/axg | super_glue | axg | | super_glue___axg | Classification | +| 338 | tweet_eval/irony | tweet_eval | irony | | tweet_eval | Classification | +| 339 | tweet_eval/offensive | tweet_eval | offensive | | tweet_eval | Classification | +| 340 | tweet_eval/stance_atheism | tweet_eval | stance_atheism | | tweet_eval | Classification | +| 341 | tweet_eval/hate | tweet_eval | hate | | tweet_eval | Classification | +| 342 | tweet_eval/emotion | tweet_eval | emotion | | tweet_eval | Classification | +| 343 | tweet_eval/emoji | tweet_eval | emoji | | tweet_eval | Classification | +| 344 | tweet_eval/sentiment | tweet_eval | sentiment | | tweet_eval | Classification | +| 345 | tweet_eval/stance_climate | tweet_eval | stance_climate | | tweet_eval | Classification | +| 346 | tweet_eval/stance_hillary | tweet_eval | stance_hillary | | tweet_eval | Classification | +| 347 | tweet_eval/stance_feminist | tweet_eval | stance_feminist | | tweet_eval | Classification | +| 348 | tweet_eval/stance_abortion | tweet_eval | stance_abortion | | tweet_eval | Classification | +| 349 | discovery/discovery | discovery | discovery | | discovery | Classification | +| 350 | pragmeval/squinky-informativeness | pragmeval | squinky-informativeness | | pragmeval_1 | Classification | +| 351 | pragmeval/switchboard | pragmeval | switchboard | | pragmeval_1 | Classification | +| 352 | pragmeval/mrda | pragmeval | mrda | | pragmeval_1 | Classification | +| 353 | pragmeval/squinky-implicature | pragmeval | squinky-implicature | | pragmeval_1 | Classification | +| 354 | pragmeval/emobank-arousal | pragmeval | emobank-arousal | | pragmeval_1 | Classification | +| 355 | pragmeval/emobank-dominance | pragmeval | emobank-dominance | | pragmeval_1 | Classification | +| 356 | pragmeval/squinky-formality | pragmeval | squinky-formality | | pragmeval_1 | Classification | +| 357 | pragmeval/verifiability | pragmeval | verifiability | | pragmeval_1 | Classification | +| 358 | pragmeval/emobank-valence | pragmeval | emobank-valence | | pragmeval_1 | Classification | +| 359 | pragmeval/persuasiveness-premisetype | pragmeval | persuasiveness-premisetype | | pragmeval_2 | Classification | +| 360 | pragmeval/emergent | pragmeval | emergent | | pragmeval_2 | Classification | +| 361 | pragmeval/gum | pragmeval | gum | | pragmeval_2 | Classification | +| 362 | pragmeval/pdtb | pragmeval | pdtb | | pragmeval_2 | Classification | +| 363 | pragmeval/persuasiveness-strength | pragmeval | persuasiveness-strength | | pragmeval_2 | Classification | +| 364 | pragmeval/sarcasm | pragmeval | sarcasm | | pragmeval_2 | Classification | +| 365 | pragmeval/persuasiveness-specificity | pragmeval | persuasiveness-specificity | | pragmeval_2 | Classification | +| 366 | pragmeval/persuasiveness-claimtype | pragmeval | persuasiveness-claimtype | | pragmeval_2 | Classification | +| 367 | pragmeval/stac | pragmeval | stac | | pragmeval_2 | Classification | +| 368 | pragmeval/persuasiveness-eloquence | pragmeval | persuasiveness-eloquence | | pragmeval_2 | Classification | +| 369 | pragmeval/persuasiveness-relevance | pragmeval | persuasiveness-relevance | | pragmeval_2 | Classification | +| 370 | silicone/maptask | silicone | maptask | | silicone | Classification | +| 371 | silicone/dyda_e | silicone | dyda_e | | silicone | Classification | +| 372 | silicone/meld_e | silicone | meld_e | | silicone | Classification | +| 373 | silicone/meld_s | silicone | meld_s | | silicone | Classification | +| 374 | silicone/oasis | silicone | oasis | | silicone | Classification | +| 375 | silicone/sem | silicone | sem | | silicone | Classification | +| 376 | silicone/dyda_da | silicone | dyda_da | | silicone | Classification | +| 377 | silicone/iemocap | silicone | iemocap | | silicone | Classification | +| 378 | lex_glue/eurlex | lex_glue | eurlex | | lex_glue___eurlex | Classification | +| 379 | lex_glue/scotus | lex_glue | scotus | | lex_glue___scotus | Classification | +| 380 | lex_glue/ledgar | lex_glue | ledgar | | lex_glue___ledgar | Classification | +| 381 | lex_glue/unfair_tos | lex_glue | unfair_tos | | lex_glue___unfair_tos | Classification | +| 382 | lex_glue/case_hold | lex_glue | case_hold | | lex_glue___case_hold | MultipleChoice | +| 383 | language-identification | papluca/language-identification | | | language_identification | Classification | +| 384 | imdb | imdb | | | imdb | Classification | +| 385 | rotten_tomatoes | rotten_tomatoes | | | rotten_tomatoes | Classification | +| 386 | ag_news | ag_news | | | ag_news | Classification | +| 387 | yelp_review_full/yelp_review_full | yelp_review_full | yelp_review_full | | yelp_review_full | Classification | +| 388 | financial_phrasebank/sentences_allagree | financial_phrasebank | sentences_allagree | | financial_phrasebank | Classification | +| 389 | poem_sentiment | poem_sentiment | | | poem_sentiment | Classification | +| 390 | dbpedia_14/dbpedia_14 | dbpedia_14 | dbpedia_14 | | dbpedia_14 | Classification | +| 391 | amazon_polarity/amazon_polarity | amazon_polarity | amazon_polarity | | amazon_polarity | Classification | +| 392 | app_reviews | app_reviews | | | app_reviews | Classification | +| 393 | hate_speech18 | hate_speech18 | | | hate_speech18 | Classification | +| 394 | sms_spam | sms_spam | | | sms_spam | Classification | +| 395 | humicroedit/subtask-1 | humicroedit | subtask-1 | | humicroedit___subtask_1 | Classification | +| 396 | humicroedit/subtask-2 | humicroedit | subtask-2 | | humicroedit___subtask_2 | Classification | +| 397 | snips_built_in_intents | snips_built_in_intents | | | snips_built_in_intents | Classification | +| 398 | banking77 | banking77 | | | banking77 | Classification | +| 399 | hate_speech_offensive | hate_speech_offensive | | | hate_speech_offensive | Classification | +| 400 | yahoo_answers_topics/yahoo_answers_topics | yahoo_answers_topics | yahoo_answers_topics | | yahoo_answers_topics | Classification | +| 401 | stackoverflow-questions | pacovaldez/stackoverflow-questions | | | stackoverflow_questions | Classification | +| 402 | hyperpartisan_news | zapsdcn/hyperpartisan_news | | | hyperpartisan_news | Classification | +| 403 | sciie | zapsdcn/sciie | | | scierc | Classification | +| 404 | citation_intent | zapsdcn/citation_intent | | | citation_intent | Classification | +| 405 | go_emotions/simplified | go_emotions | simplified | | go_emotions___simplified | Classification | +| 406 | scicite | scicite | | | scicite | Classification | +| 407 | liar | liar | | | liar | Classification | +| 408 | lexical_relation_classification/BLESS | relbert/lexical_relation_classification | BLESS | | relbert_lexical_relation_classification | Classification | +| 409 | lexical_relation_classification/ROOT09 | relbert/lexical_relation_classification | ROOT09 | | relbert_lexical_relation_classification | Classification | +| 410 | lexical_relation_classification/K&H+N | relbert/lexical_relation_classification | K&H+N | | relbert_lexical_relation_classification | Classification | +| 411 | lexical_relation_classification/EVALution | relbert/lexical_relation_classification | EVALution | | relbert_lexical_relation_classification | Classification | +| 412 | lexical_relation_classification/CogALexV | relbert/lexical_relation_classification | CogALexV | | relbert_lexical_relation_classification | Classification | +| 413 | linguisticprobing/obj_number | metaeval/linguisticprobing | obj_number | | metaeval_linguisticprobing | Classification | +| 414 | linguisticprobing/word_content | metaeval/linguisticprobing | word_content | | metaeval_linguisticprobing | Classification | +| 415 | linguisticprobing/past_present | metaeval/linguisticprobing | past_present | | metaeval_linguisticprobing | Classification | +| 416 | linguisticprobing/sentence_length | metaeval/linguisticprobing | sentence_length | | metaeval_linguisticprobing | Classification | +| 417 | linguisticprobing/top_constituents | metaeval/linguisticprobing | top_constituents | | metaeval_linguisticprobing | Classification | +| 418 | linguisticprobing/tree_depth | metaeval/linguisticprobing | tree_depth | | metaeval_linguisticprobing | Classification | +| 419 | linguisticprobing/coordination_inversion | metaeval/linguisticprobing | coordination_inversion | | metaeval_linguisticprobing | Classification | +| 420 | linguisticprobing/odd_man_out | metaeval/linguisticprobing | odd_man_out | | metaeval_linguisticprobing | Classification | +| 421 | linguisticprobing/bigram_shift | metaeval/linguisticprobing | bigram_shift | | metaeval_linguisticprobing | Classification | +| 422 | linguisticprobing/subj_number | metaeval/linguisticprobing | subj_number | | metaeval_linguisticprobing | Classification | +| 423 | crowdflower/sentiment_nuclear_power | metaeval/crowdflower | sentiment_nuclear_power | | metaeval_crowdflower | Classification | +| 424 | crowdflower/tweet_global_warming | metaeval/crowdflower | tweet_global_warming | | metaeval_crowdflower | Classification | +| 425 | crowdflower/political-media-message | metaeval/crowdflower | political-media-message | | metaeval_crowdflower | Classification | +| 426 | crowdflower/corporate-messaging | metaeval/crowdflower | corporate-messaging | | metaeval_crowdflower | Classification | +| 427 | crowdflower/airline-sentiment | metaeval/crowdflower | airline-sentiment | | metaeval_crowdflower | Classification | +| 428 | crowdflower/political-media-bias | metaeval/crowdflower | political-media-bias | | metaeval_crowdflower | Classification | +| 429 | crowdflower/text_emotion | metaeval/crowdflower | text_emotion | | metaeval_crowdflower | Classification | +| 430 | crowdflower/economic-news | metaeval/crowdflower | economic-news | | metaeval_crowdflower | Classification | +| 431 | crowdflower/political-media-audience | metaeval/crowdflower | political-media-audience | | metaeval_crowdflower | Classification | +| 432 | ethics/commonsense | metaeval/ethics | commonsense | | metaeval_ethics___commonsense | Classification | +| 433 | ethics/deontology | metaeval/ethics | deontology | | metaeval_ethics___deontology | Classification | +| 434 | ethics/justice | metaeval/ethics | justice | | metaeval_ethics___justice | Classification | +| 435 | ethics/virtue | metaeval/ethics | virtue | | metaeval_ethics___virtue | Classification | +| 436 | emo/emo2019 | emo | emo2019 | | emo | Classification | +| 437 | google_wellformed_query | google_wellformed_query | | | google_wellformed_query | Classification | +| 438 | tweets_hate_speech_detection | tweets_hate_speech_detection | | | tweets_hate_speech_detection | Classification | +| 439 | adv_glue/adv_sst2 | adv_glue | adv_sst2 | | adv_glue___adv_sst2 | Classification | +| 440 | adv_glue/adv_qqp | adv_glue | adv_qqp | | adv_glue___adv_qqp | Classification | +| 441 | adv_glue/adv_mnli | adv_glue | adv_mnli | | adv_glue___adv_mnli | Classification | +| 442 | adv_glue/adv_mnli_mismatched | adv_glue | adv_mnli_mismatched | | adv_glue___adv_mnli_mismatched | Classification | +| 443 | adv_glue/adv_qnli | adv_glue | adv_qnli | | adv_glue___adv_qnli | Classification | +| 444 | adv_glue/adv_rte | adv_glue | adv_rte | | adv_glue___adv_rte | Classification | +| 445 | has_part | has_part | | | has_part | Classification | +| 446 | wnut_17/wnut_17 | wnut_17 | wnut_17 | | wnut_17 | TokenClassification | +| 447 | ncbi_disease/ncbi_disease | ncbi_disease | ncbi_disease | | ncbi_disease | TokenClassification | +| 448 | acronym_identification | acronym_identification | | | acronym_identification | TokenClassification | +| 449 | jnlpba/jnlpba | jnlpba | jnlpba | | jnlpba | TokenClassification | +| 450 | species_800/species_800 | species_800 | species_800 | | species_800 | TokenClassification | +| 451 | ontonotes_english/SpeedOfMagic--ontonotes_english | SpeedOfMagic/ontonotes_english | SpeedOfMagic--ontonotes_english | | SpeedOfMagic_ontonotes_english | TokenClassification | +| 452 | blog_authorship_corpus/gender | blog_authorship_corpus | | gender | blog_authorship_corpus__gender | Classification | +| 453 | blog_authorship_corpus/age | blog_authorship_corpus | | age | blog_authorship_corpus__age | Classification | +| 454 | blog_authorship_corpus/horoscope | blog_authorship_corpus | | horoscope | blog_authorship_corpus__horoscope | Classification | +| 455 | blog_authorship_corpus/job | blog_authorship_corpus | | job | blog_authorship_corpus__job | Classification | +| 456 | open_question_type | launch/open_question_type | | | launch_open_question_type | Classification | +| 457 | health_fact | health_fact | | | health_fact | Classification | +| 458 | commonsense_qa | commonsense_qa | | | commonsense_qa | MultipleChoice | +| 459 | mc_taco | mc_taco | | | mc_taco | Classification | +| 460 | ade_corpus_v2/Ade_corpus_v2_classification | ade_corpus_v2 | Ade_corpus_v2_classification | | ade_corpus_v2___Ade_corpus_v2_classification | Classification | +| 461 | discosense | prajjwal1/discosense | | | discosense | MultipleChoice | +| 462 | circa | circa | | | circa | Classification | +| 463 | code_x_glue_cc_defect_detection | code_x_glue_cc_defect_detection | | | code_x_glue_cc_defect_detection | Classification | +| 464 | code_x_glue_cc_code_refinement/medium | code_x_glue_cc_code_refinement | medium | | code_x_glue_cc_code_refinement | MultipleChoice | +| 465 | EffectiveFeedbackStudentWriting | YaHi/EffectiveFeedbackStudentWriting | | | effective_feedback_student_writing | Classification | +| 466 | promptSentiment | Ericwang/promptSentiment | | | promptSentiment | Classification | +| 467 | promptNLI | Ericwang/promptNLI | | | promptNLI | Classification | +| 468 | promptSpoke | Ericwang/promptSpoke | | | promptSpoke | Classification | +| 469 | promptProficiency | Ericwang/promptProficiency | | | promptProficiency | Classification | +| 470 | promptGrammar | Ericwang/promptGrammar | | | promptGrammar | Classification | +| 471 | promptCoherence | Ericwang/promptCoherence | | | promptCoherence | Classification | +| 472 | phrase_similarity | PiC/phrase_similarity | | | phrase_similarity | Classification | +| 473 | scientific-exaggeration-detection | copenlu/scientific-exaggeration-detection | | | exaggeration_detection | Classification | +| 474 | quarel | quarel | | | quarel | Classification | +| 475 | fever-evidence-related/mwong--fever-related | mwong/fever-evidence-related | mwong--fever-related | | mwong_fever_evidence_related | Classification | +| 476 | numer_sense | numer_sense | | | numer_sense | Classification | +| 477 | dynasent/dynabench.dynasent.r1.all/r1 | dynabench/dynasent | dynabench.dynasent.r1.all | r1 | dynasent__r1 | Classification | +| 478 | dynasent/dynabench.dynasent.r2.all/r2 | dynabench/dynasent | dynabench.dynasent.r2.all | r2 | dynasent__r2 | Classification | +| 479 | Sarcasm_News_Headline | raquiba/Sarcasm_News_Headline | | | sarcasm_news | Classification | +| 480 | sem_eval_2010_task_8 | sem_eval_2010_task_8 | | | sem_eval_2010_task_8 | Classification | +| 481 | auditor_review/demo-org--auditor_review | demo-org/auditor_review | demo-org--auditor_review | | demo_org_auditor_review | Classification | +| 482 | medmcqa | medmcqa | | | medmcqa | MultipleChoice | +| 483 | aqua_rat/tokenized | aqua_rat | tokenized | | aqua_rat___tokenized | MultipleChoice |