diff --git a/README.md b/README.md
index 59205cb..ebb66f9 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # corpus-filtering
 
+## TODOs
+
+- [ ] add minicons, ipykernel, statsmodels to environment
+
 ## Development & Contribution Guidelines
 
 ### Basic Setup
diff --git a/results/analysis.ipynb b/results/analysis.ipynb
new file mode 100644
index 0000000..195611b
--- /dev/null
+++ b/results/analysis.ipynb
@@ -0,0 +1,308 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import scipy\n",
+    "import statsmodels.api as sm\n",
+    "import statsmodels.formula.api as smf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'binding-reconstruction', 'npi-sent-neg', 'npi-only', 'binding-case', 'passive', 'superlative-quantifier', 'pp-mod-subj', 'existential-there-quantifier', 'det-adj-noun', 'full', 'det-noun', 're-irr-sv-agr', 'binding-domain', 'rel-cl', 'binding-c-command', 'npi-sim-ques'}\n",
+      "{'determiner_noun_agreement_1', 'principle_A_domain_1', 'determiner_noun_agreement_with_adjective_1', 'determiner_noun_agreement_with_adj_irregular_2', 'sentential_negation_npi_licensor_present', 'regular_plural_subject_verb_agreement_1', 'principle_A_case_2', 'passive_1', 'determiner_noun_agreement_irregular_2', 'regular_plural_subject_verb_agreement_2', 'principle_A_c_command', 'determiner_noun_agreement_with_adj_irregular_1', 'matrix_question_npi_licensor_present', 'only_npi_scope', 'sentential_negation_npi_scope', 'determiner_noun_agreement_with_adj_2', 'principle_A_case_1', 'principle_A_domain_3', 'existential_there_quantifiers_1', 'passive_2', 'superlative_quantifiers_2', 'irregular_plural_subject_verb_agreement_2', 'principle_A_reconstruction', 'superlative_quantifiers_1', 'determiner_noun_agreement_irregular_1', 'only_npi_licensor_present', 'principle_A_domain_2', 'distractor_agreement_relational_noun', 'irregular_plural_subject_verb_agreement_1', 'determiner_noun_agreement_2', 'distractor_agreement_relative_clause'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "blimp_corpus_map = {\n",
+    "    \"full\": [],\n",
+    "    \"pp-mod-subj\": [\"distractor_agreement_relational_noun\"],\n",
+    "    \"rel-cl\": [\"distractor_agreement_relative_clause\"],\n",
+    "    \"re-irr-sv-agr\": [\n",
+    "        \"irregular_plural_subject_verb_agreement_1\",\n",
+    "        \"irregular_plural_subject_verb_agreement_2\",\n",
+    "        \"regular_plural_subject_verb_agreement_1\",\n",
+    "        \"regular_plural_subject_verb_agreement_2\",\n",
+    "    ],\n",
+    "    \"npi-only\": [\"only_npi_licensor_present\", \"only_npi_scope\"],\n",
+    "    \"npi-sent-neg\": [\n",
+    "        \"sentential_negation_npi_licensor_present\",\n",
+    "        \"sentential_negation_npi_scope\",\n",
+    "    ],\n",
+    "    \"npi-sim-ques\": [\"matrix_question_npi_licensor_present\"],\n",
+    "    \"superlative-quantifier\": [\n",
+    "        \"superlative_quantifiers_1\",\n",
+    "        \"superlative_quantifiers_2\",\n",
+    "    ],\n",
+    "    \"existential-there-quantifier\": [\"existential_there_quantifiers_1\"],\n",
+    "    \"binding-c-command\": [\"principle_A_c_command\"],\n",
+    "    \"binding-case\": [\"principle_A_case_1\", \"principle_A_case_2\"],\n",
+    "    \"binding-domain\": [\n",
+    "        \"principle_A_domain_1\",\n",
+    "        \"principle_A_domain_2\",\n",
+    "        \"principle_A_domain_3\",\n",
+    "    ],\n",
+    "    \"binding-reconstruction\": [\"principle_A_reconstruction\"],\n",
+    "    \"passive\": [\"passive_1\", \"passive_2\"],\n",
+    "    \"det-adj-noun\": [\n",
+    "        \"determiner_noun_agreement_with_adjective_1\",\n",
+    "        \"determiner_noun_agreement_with_adj_2\",\n",
+    "        \"determiner_noun_agreement_with_adj_irregular_1\",\n",
+    "        \"determiner_noun_agreement_with_adj_irregular_2\",\n",
+    "    ],\n",
+    "    \"det-noun\": [\n",
+    "        \"determiner_noun_agreement_1\",\n",
+    "        \"determiner_noun_agreement_2\",\n",
+    "        \"determiner_noun_agreement_irregular_1\",\n",
+    "        \"determiner_noun_agreement_irregular_2\",\n",
+    "    ],\n",
+    "}\n",
+    "all_filters = set(blimp_corpus_map.keys())\n",
+    "benchmarks_with_filters = set([_ for xs in blimp_corpus_map.values() for _ in xs])\n",
+    "print(all_filters)\n",
+    "print(benchmarks_with_filters)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Read the main data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['corpus', 'arch', 'seed', 'validation_loss', 'validation_ppl',\n",
+      "       'test_loss', 'test_ppl', 'blimp_benchmark', 'blimp_acc',\n",
+      "       'filter_target', 'z_validation_ppl', 'z_validation_loss', 'z_test_ppl',\n",
+      "       'z_test_loss', 'full_same_seed_acc', 'blimp_delta_same_seed',\n",
+      "       'full_all_seed_avg_acc', 'blimp_delta', 'field', 'linguistics_term',\n",
+      "       'corpus_tokens'],\n",
+      "      dtype='object')\n",
+      "Unexpected exception formatting exception. Falling back to standard exception\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/formatters.py\", line 223, in catch_format_error\n",
+      "    r = method(self, *args, **kwargs)\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/formatters.py\", line 344, in __call__\n",
+      "    return method()\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/core/frame.py\", line 1106, in _repr_html_\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/format.py\", line 1110, in to_html\n",
+      "    When formatting an Index subclass\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 88, in to_string\n",
+      "    lines = self.render()\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 644, in render\n",
+      "    super().render()\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 94, in render\n",
+      "    self._write_table()\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 267, in _write_table\n",
+      "    self._write_header(indent + self.indent_delta)\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 403, in _write_header\n",
+      "    self._write_col_header(indent + self.indent_delta)\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 383, in _write_col_header\n",
+      "    row.extend(self._get_columns_formatted_values())\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/pandas/io/formats/html.py\", line 611, in _get_columns_formatted_values\n",
+      "    return self.columns._format_flat(include_name=False)\n",
+      "AttributeError: 'Index' object has no attribute '_format_flat'\n",
+      "\n",
+      "During handling of the above exception, another exception occurred:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/interactiveshell.py\", line 2102, in showtraceback\n",
+      "    stb = self.InteractiveTB.structured_traceback(\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 1310, in structured_traceback\n",
+      "    return FormattedTB.structured_traceback(\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 1199, in structured_traceback\n",
+      "    return VerboseTB.structured_traceback(\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 1052, in structured_traceback\n",
+      "    formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 978, in format_exception_as_a_whole\n",
+      "    frames.append(self.format_record(record))\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 878, in format_record\n",
+      "    frame_info.lines, Colors, self.has_colors, lvals\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/IPython/core/ultratb.py\", line 712, in lines\n",
+      "    return self._sd.lines\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/utils.py\", line 144, in cached_property_wrapper\n",
+      "    value = obj.__dict__[self.func.__name__] = self.func(obj)\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/core.py\", line 734, in lines\n",
+      "    pieces = self.included_pieces\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/utils.py\", line 144, in cached_property_wrapper\n",
+      "    value = obj.__dict__[self.func.__name__] = self.func(obj)\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/core.py\", line 681, in included_pieces\n",
+      "    pos = scope_pieces.index(self.executing_piece)\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/utils.py\", line 144, in cached_property_wrapper\n",
+      "    value = obj.__dict__[self.func.__name__] = self.func(obj)\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/stack_data/core.py\", line 660, in executing_piece\n",
+      "    return only(\n",
+      "  File \"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/executing/executing.py\", line 116, in only\n",
+      "    raise NotOneValueFound('Expected one value, found 0')\n",
+      "executing.executing.NotOneValueFound: Expected one value, found 0\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "        corpus         arch  seed  validation_loss  validation_ppl  test_loss  \\\n",
+       "0         full  transformer     0         3.841752       46.607061   3.845174   \n",
+       "1         full  transformer     1         3.840523       46.549825   3.844156   \n",
+       "2         full  transformer     2         3.840050       46.527789   3.843551   \n",
+       "3         full  transformer     3         3.841511       46.595850   3.844735   \n",
+       "4         full  transformer     4         3.842284       46.631870   3.846043   \n",
+       "...        ...          ...   ...              ...             ...        ...   \n",
+       "10715  passive         lstm     0         3.975108       53.255876   3.977766   \n",
+       "10716  passive         lstm     1         3.975204       53.261006   3.978100   \n",
+       "10717  passive         lstm     2         3.974197       53.207382   3.977306   \n",
+       "10718  passive         lstm     3         3.975786       53.291974   3.978186   \n",
+       "10719  passive         lstm     4         3.977474       53.382045   3.980730   \n",
+       "\n",
+       "        test_ppl                    blimp_benchmark  blimp_acc  filter_target  \\\n",
+       "0      46.766802                     adjunct_island      0.726          False   \n",
+       "1      46.719216                     adjunct_island      0.654          False   \n",
+       "2      46.690965                     adjunct_island      0.668          False   \n",
+       "3      46.746313                     adjunct_island      0.814          False   \n",
+       "4      46.807473                     adjunct_island      0.665          False   \n",
+       "...          ...                                ...        ...            ...   \n",
+       "10715  53.397600  wh_vs_that_with_gap_long_distance      0.137          False   \n",
+       "10716  53.415426  wh_vs_that_with_gap_long_distance      0.164          False   \n",
+       "10717  53.373047  wh_vs_that_with_gap_long_distance      0.121          False   \n",
+       "10718  53.420024  wh_vs_that_with_gap_long_distance      0.091          False   \n",
+       "10719  53.556119  wh_vs_that_with_gap_long_distance      0.125          False   \n",
+       "\n",
+       "       ...  z_validation_loss  z_test_ppl  z_test_loss  full_same_seed_acc  \\\n",
+       "0      ...          -1.096013   -1.089433    -1.096798               0.726   \n",
+       "1      ...          -1.114756   -1.103922    -1.112395               0.654   \n",
+       "2      ...          -1.121978   -1.112524    -1.121662               0.668   \n",
+       "3      ...          -1.099682   -1.095671    -1.103512               0.814   \n",
+       "4      ...          -1.087896   -1.077049    -1.083481               0.665   \n",
+       "...    ...                ...         ...          ...                 ...   \n",
+       "10715  ...           0.938086    0.929552     0.934563               0.144   \n",
+       "10716  ...           0.939555    0.934980     0.939677               0.139   \n",
+       "10717  ...           0.924191    0.922076     0.927517               0.161   \n",
+       "10718  ...           0.948421    0.936380     0.940995               0.156   \n",
+       "10719  ...           0.974180    0.977819     0.979977               0.087   \n",
+       "\n",
+       "       blimp_delta_same_seed  full_all_seed_avg_acc  blimp_delta   field  \\\n",
+       "0                      0.000                 0.7054       0.0206  syntax   \n",
+       "1                      0.000                 0.7054      -0.0514  syntax   \n",
+       "2                      0.000                 0.7054      -0.0374  syntax   \n",
+       "3                      0.000                 0.7054       0.1086  syntax   \n",
+       "4                      0.000                 0.7054      -0.0404  syntax   \n",
+       "...                      ...                    ...          ...     ...   \n",
+       "10715                 -0.007                 0.1374      -0.0004  syntax   \n",
+       "10716                  0.025                 0.1374       0.0266  syntax   \n",
+       "10717                 -0.040                 0.1374      -0.0164  syntax   \n",
+       "10718                 -0.065                 0.1374      -0.0464  syntax   \n",
+       "10719                  0.038                 0.1374      -0.0124  syntax   \n",
+       "\n",
+       "            linguistics_term corpus_tokens  \n",
+       "0             island_effects      66442068  \n",
+       "1             island_effects      66442068  \n",
+       "2             island_effects      66442068  \n",
+       "3             island_effects      66442068  \n",
+       "4             island_effects      66442068  \n",
+       "...                      ...           ...  \n",
+       "10715  filler_gap_dependency      66155000  \n",
+       "10716  filler_gap_dependency      66155000  \n",
+       "10717  filler_gap_dependency      66155000  \n",
+       "10718  filler_gap_dependency      66155000  \n",
+       "10719  filler_gap_dependency      66155000  \n",
+       "\n",
+       "[10720 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "main_data = pd.read_csv(\"data/tidy_results.csv\", index_col=0)\n",
+    "main_data = main_data.rename(columns = {\"blimp_delta_all_seed_avg\": \"blimp_delta\"})\n",
+    "print(main_data.columns)\n",
+    "main_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "arch\n",
+      "lstm           53.403396\n",
+      "transformer    46.970552\n",
+      "Name: validation_ppl, dtype: float64\n",
+      "TtestResult(statistic=271.4049864115043, pvalue=4.192319978874203e-29, df=15)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/7b/186zvw415gldw_ktxf30pyzc0000gn/T/ipykernel_33594/3779774374.py:9: FutureWarning: Using the level keyword in DataFrame and Series aggregations is deprecated and will be removed in a future version. Use groupby instead. df.median(level=1) should use df.groupby(level=1).median().\n",
+      "  print(ppls_by_arch.mean(level=\"arch\"))\n"
+     ]
+    }
+   ],
+   "source": [
+    "# group data by corpus and architecture, take the mean val ppl across seeds\n",
+    "ppls_by_arch = main_data.groupby([\"corpus\", \"arch\"])[\"validation_ppl\"].mean()\n",
+    "# get the difference between lstm and transformer for each corpus\n",
+    "ppl_unstacked = ppls_by_arch.unstack(level=\"arch\")\n",
+    "\n",
+    "# print the mean ppl for each architecture\n",
+    "print(ppls_by_arch.mean(level=\"arch\"))\n",
+    "print(scipy.stats.ttest_rel(ppl_unstacked[\"lstm\"], ppl_unstacked[\"transformer\"]))\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "corpus-filtering",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}