-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
312 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,308 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 35, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import scipy\n", | ||
"import statsmodels.api as sm\n", | ||
"import statsmodels.formula.api as smf" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"{'binding-reconstruction', 'npi-sent-neg', 'npi-only', 'binding-case', 'passive', 'superlative-quantifier', 'pp-mod-subj', 'existential-there-quantifier', 'det-adj-noun', 'full', 'det-noun', 're-irr-sv-agr', 'binding-domain', 'rel-cl', 'binding-c-command', 'npi-sim-ques'}\n", | ||
"{'determiner_noun_agreement_1', 'principle_A_domain_1', 'determiner_noun_agreement_with_adjective_1', 'determiner_noun_agreement_with_adj_irregular_2', 'sentential_negation_npi_licensor_present', 'regular_plural_subject_verb_agreement_1', 'principle_A_case_2', 'passive_1', 'determiner_noun_agreement_irregular_2', 'regular_plural_subject_verb_agreement_2', 'principle_A_c_command', 'determiner_noun_agreement_with_adj_irregular_1', 'matrix_question_npi_licensor_present', 'only_npi_scope', 'sentential_negation_npi_scope', 'determiner_noun_agreement_with_adj_2', 'principle_A_case_1', 'principle_A_domain_3', 'existential_there_quantifiers_1', 'passive_2', 'superlative_quantifiers_2', 'irregular_plural_subject_verb_agreement_2', 'principle_A_reconstruction', 'superlative_quantifiers_1', 'determiner_noun_agreement_irregular_1', 'only_npi_licensor_present', 'principle_A_domain_2', 'distractor_agreement_relational_noun', 'irregular_plural_subject_verb_agreement_1', 'determiner_noun_agreement_2', 'distractor_agreement_relative_clause'}\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"blimp_corpus_map = {\n", | ||
" \"full\": [],\n", | ||
" \"pp-mod-subj\": [\"distractor_agreement_relational_noun\"],\n", | ||
" \"rel-cl\": [\"distractor_agreement_relative_clause\"],\n", | ||
" \"re-irr-sv-agr\": [\n", | ||
" \"irregular_plural_subject_verb_agreement_1\",\n", | ||
" \"irregular_plural_subject_verb_agreement_2\",\n", | ||
" \"regular_plural_subject_verb_agreement_1\",\n", | ||
" \"regular_plural_subject_verb_agreement_2\",\n", | ||
" ],\n", | ||
" \"npi-only\": [\"only_npi_licensor_present\", \"only_npi_scope\"],\n", | ||
" \"npi-sent-neg\": [\n", | ||
" \"sentential_negation_npi_licensor_present\",\n", | ||
" \"sentential_negation_npi_scope\",\n", | ||
" ],\n", | ||
" \"npi-sim-ques\": [\"matrix_question_npi_licensor_present\"],\n", | ||
" \"superlative-quantifier\": [\n", | ||
" \"superlative_quantifiers_1\",\n", | ||
" \"superlative_quantifiers_2\",\n", | ||
" ],\n", | ||
" \"existential-there-quantifier\": [\"existential_there_quantifiers_1\"],\n", | ||
" \"binding-c-command\": [\"principle_A_c_command\"],\n", | ||
" \"binding-case\": [\"principle_A_case_1\", \"principle_A_case_2\"],\n", | ||
" \"binding-domain\": [\n", | ||
" \"principle_A_domain_1\",\n", | ||
" \"principle_A_domain_2\",\n", | ||
" \"principle_A_domain_3\",\n", | ||
" ],\n", | ||
" \"binding-reconstruction\": [\"principle_A_reconstruction\"],\n", | ||
" \"passive\": [\"passive_1\", \"passive_2\"],\n", | ||
" \"det-adj-noun\": [\n", | ||
" \"determiner_noun_agreement_with_adjective_1\",\n", | ||
" \"determiner_noun_agreement_with_adj_2\",\n", | ||
" \"determiner_noun_agreement_with_adj_irregular_1\",\n", | ||
" \"determiner_noun_agreement_with_adj_irregular_2\",\n", | ||
" ],\n", | ||
" \"det-noun\": [\n", | ||
" \"determiner_noun_agreement_1\",\n", | ||
" \"determiner_noun_agreement_2\",\n", | ||
" \"determiner_noun_agreement_irregular_1\",\n", | ||
" \"determiner_noun_agreement_irregular_2\",\n", | ||
" ],\n", | ||
"}\n", | ||
"all_filters = set(blimp_corpus_map.keys())\n", | ||
"benchmarks_with_filters = set([_ for xs in blimp_corpus_map.values() for _ in xs])\n", | ||
"print(all_filters)\n", | ||
"print(benchmarks_with_filters)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Read the main data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 22, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Index(['corpus', 'arch', 'seed', 'validation_loss', 'validation_ppl',\n", | ||
" 'test_loss', 'test_ppl', 'blimp_benchmark', 'blimp_acc',\n", | ||
" 'filter_target', 'z_validation_ppl', 'z_validation_loss', 'z_test_ppl',\n", | ||
" 'z_test_loss', 'full_same_seed_acc', 'blimp_delta_same_seed',\n", | ||
" 'full_all_seed_avg_acc', 'blimp_delta', 'field', 'linguistics_term',\n", | ||
" 'corpus_tokens'],\n", | ||
" dtype='object')\n", | ||
"Unexpected exception formatting exception. Falling back to standard exception\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Traceback (most recent call last):\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/formatters.py\", line 223, in catch_format_error\n", | ||
" r = method(self, *args, **kwargs)\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/formatters.py\", line 344, in __call__\n", | ||
" return method()\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/core/frame.py\", line 1106, in _repr_html_\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/format.py\", line 1110, in to_html\n", | ||
" When formatting an Index subclass\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 88, in to_string\n", | ||
" lines = self.render()\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 644, in render\n", | ||
" super().render()\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 94, in render\n", | ||
" self._write_table()\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 267, in _write_table\n", | ||
" self._write_header(indent + self.indent_delta)\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 403, in _write_header\n", | ||
" self._write_col_header(indent + self.indent_delta)\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 383, in _write_col_header\n", | ||
" row.extend(self._get_columns_formatted_values())\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 611, in _get_columns_formatted_values\n", | ||
" return self.columns._format_flat(include_name=False)\n", | ||
"AttributeError: 'Index' object has no attribute '_format_flat'\n", | ||
"\n", | ||
"During handling of the above exception, another exception occurred:\n", | ||
"\n", | ||
"Traceback (most recent call last):\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/interactiveshell.py\", line 2102, in showtraceback\n", | ||
" stb = self.InteractiveTB.structured_traceback(\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 1310, in structured_traceback\n", | ||
" return FormattedTB.structured_traceback(\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 1199, in structured_traceback\n", | ||
" return VerboseTB.structured_traceback(\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 1052, in structured_traceback\n", | ||
" formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 978, in format_exception_as_a_whole\n", | ||
" frames.append(self.format_record(record))\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 878, in format_record\n", | ||
" frame_info.lines, Colors, self.has_colors, lvals\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 712, in lines\n", | ||
" return self._sd.lines\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/utils.py\", line 144, in cached_property_wrapper\n", | ||
" value = obj.__dict__[self.func.__name__] = self.func(obj)\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/core.py\", line 734, in lines\n", | ||
" pieces = self.included_pieces\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/utils.py\", line 144, in cached_property_wrapper\n", | ||
" value = obj.__dict__[self.func.__name__] = self.func(obj)\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/core.py\", line 681, in included_pieces\n", | ||
" pos = scope_pieces.index(self.executing_piece)\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/utils.py\", line 144, in cached_property_wrapper\n", | ||
" value = obj.__dict__[self.func.__name__] = self.func(obj)\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/core.py\", line 660, in executing_piece\n", | ||
" return only(\n", | ||
" File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/executing/executing.py\", line 116, in only\n", | ||
" raise NotOneValueFound('Expected one value, found 0')\n", | ||
"executing.executing.NotOneValueFound: Expected one value, found 0\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
" corpus arch seed validation_loss validation_ppl test_loss \\\n", | ||
"0 full transformer 0 3.841752 46.607061 3.845174 \n", | ||
"1 full transformer 1 3.840523 46.549825 3.844156 \n", | ||
"2 full transformer 2 3.840050 46.527789 3.843551 \n", | ||
"3 full transformer 3 3.841511 46.595850 3.844735 \n", | ||
"4 full transformer 4 3.842284 46.631870 3.846043 \n", | ||
"... ... ... ... ... ... ... \n", | ||
"10715 passive lstm 0 3.975108 53.255876 3.977766 \n", | ||
"10716 passive lstm 1 3.975204 53.261006 3.978100 \n", | ||
"10717 passive lstm 2 3.974197 53.207382 3.977306 \n", | ||
"10718 passive lstm 3 3.975786 53.291974 3.978186 \n", | ||
"10719 passive lstm 4 3.977474 53.382045 3.980730 \n", | ||
"\n", | ||
" test_ppl blimp_benchmark blimp_acc filter_target \\\n", | ||
"0 46.766802 adjunct_island 0.726 False \n", | ||
"1 46.719216 adjunct_island 0.654 False \n", | ||
"2 46.690965 adjunct_island 0.668 False \n", | ||
"3 46.746313 adjunct_island 0.814 False \n", | ||
"4 46.807473 adjunct_island 0.665 False \n", | ||
"... ... ... ... ... \n", | ||
"10715 53.397600 wh_vs_that_with_gap_long_distance 0.137 False \n", | ||
"10716 53.415426 wh_vs_that_with_gap_long_distance 0.164 False \n", | ||
"10717 53.373047 wh_vs_that_with_gap_long_distance 0.121 False \n", | ||
"10718 53.420024 wh_vs_that_with_gap_long_distance 0.091 False \n", | ||
"10719 53.556119 wh_vs_that_with_gap_long_distance 0.125 False \n", | ||
"\n", | ||
" ... z_validation_loss z_test_ppl z_test_loss full_same_seed_acc \\\n", | ||
"0 ... -1.096013 -1.089433 -1.096798 0.726 \n", | ||
"1 ... -1.114756 -1.103922 -1.112395 0.654 \n", | ||
"2 ... -1.121978 -1.112524 -1.121662 0.668 \n", | ||
"3 ... -1.099682 -1.095671 -1.103512 0.814 \n", | ||
"4 ... -1.087896 -1.077049 -1.083481 0.665 \n", | ||
"... ... ... ... ... ... \n", | ||
"10715 ... 0.938086 0.929552 0.934563 0.144 \n", | ||
"10716 ... 0.939555 0.934980 0.939677 0.139 \n", | ||
"10717 ... 0.924191 0.922076 0.927517 0.161 \n", | ||
"10718 ... 0.948421 0.936380 0.940995 0.156 \n", | ||
"10719 ... 0.974180 0.977819 0.979977 0.087 \n", | ||
"\n", | ||
" blimp_delta_same_seed full_all_seed_avg_acc blimp_delta field \\\n", | ||
"0 0.000 0.7054 0.0206 syntax \n", | ||
"1 0.000 0.7054 -0.0514 syntax \n", | ||
"2 0.000 0.7054 -0.0374 syntax \n", | ||
"3 0.000 0.7054 0.1086 syntax \n", | ||
"4 0.000 0.7054 -0.0404 syntax \n", | ||
"... ... ... ... ... \n", | ||
"10715 -0.007 0.1374 -0.0004 syntax \n", | ||
"10716 0.025 0.1374 0.0266 syntax \n", | ||
"10717 -0.040 0.1374 -0.0164 syntax \n", | ||
"10718 -0.065 0.1374 -0.0464 syntax \n", | ||
"10719 0.038 0.1374 -0.0124 syntax \n", | ||
"\n", | ||
" linguistics_term corpus_tokens \n", | ||
"0 island_effects 66442068 \n", | ||
"1 island_effects 66442068 \n", | ||
"2 island_effects 66442068 \n", | ||
"3 island_effects 66442068 \n", | ||
"4 island_effects 66442068 \n", | ||
"... ... ... \n", | ||
"10715 filler_gap_dependency 66155000 \n", | ||
"10716 filler_gap_dependency 66155000 \n", | ||
"10717 filler_gap_dependency 66155000 \n", | ||
"10718 filler_gap_dependency 66155000 \n", | ||
"10719 filler_gap_dependency 66155000 \n", | ||
"\n", | ||
"[10720 rows x 21 columns]" | ||
] | ||
}, | ||
"execution_count": 22, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"main_data = pd.read_csv(\"data/tidy_results.csv\", index_col=0)\n", | ||
"main_data = main_data.rename(columns = {\"blimp_delta_all_seed_avg\": \"blimp_delta\"})\n", | ||
"print(main_data.columns)\n", | ||
"main_data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 36, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"arch\n", | ||
"lstm 53.403396\n", | ||
"transformer 46.970552\n", | ||
"Name: validation_ppl, dtype: float64\n", | ||
"TtestResult(statistic=271.4049864115043, pvalue=4.192319978874203e-29, df=15)\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/var/folders/7b/186zvw415gldw_ktxf30pyzc0000gn/T/ipykernel_33594/3779774374.py:9: FutureWarning: Using the level keyword in DataFrame and Series aggregations is deprecated and will be removed in a future version. Use groupby instead. df.median(level=1) should use df.groupby(level=1).median().\n", | ||
" print(ppls_by_arch.mean(level=\"arch\"))\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# group data by corpus and architecture, take the mean val ppl across seeds\n", | ||
"ppls_by_arch = main_data.groupby([\"corpus\", \"arch\"])[\"validation_ppl\"].mean()\n", | ||
"# get the difference between lstm and transformer for each corpus\n", | ||
"ppl_unstacked = ppls_by_arch.unstack(level=\"arch\")\n", | ||
"\n", | ||
"# print the mean ppl for each architecture\n", | ||
"print(ppls_by_arch.mean(level=\"arch\"))\n", | ||
"print(scipy.stats.ttest_rel(ppl_unstacked[\"lstm\"], ppl_unstacked[\"transformer\"]))\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "corpus-filtering", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.16" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |