From e425c905284d223c67874bc7ae1a844bd9c52ef3 Mon Sep 17 00:00:00 2001 From: Shane Steinert-Threlkeld Date: Tue, 7 May 2024 13:39:25 -0700 Subject: [PATCH] add regression to notebook --- results/analysis.ipynb | 169 +++++++++++++++++++---------------------- 1 file changed, 79 insertions(+), 90 deletions(-) diff --git a/results/analysis.ipynb b/results/analysis.ipynb index 99a1050..487f6b4 100644 --- a/results/analysis.ipynb +++ b/results/analysis.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -14,15 +14,15 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'binding-reconstruction', 'binding-domain', 'rel-cl', 'npi-sim-ques', 'binding-c-command', 'binding-case', 'passive', 'existential-there-quantifier', 'det-noun', 'npi-only', 'pp-mod-subj', 'full', 're-irr-sv-agr', 'det-adj-noun', 'npi-sent-neg', 'superlative-quantifier'}\n", - "{'sentential_negation_npi_scope', 'existential_there_quantifiers_1', 'matrix_question_npi_licensor_present', 'superlative_quantifiers_1', 'principle_A_case_1', 'principle_A_domain_1', 'principle_A_domain_3', 'principle_A_case_2', 'determiner_noun_agreement_with_adj_2', 'distractor_agreement_relative_clause', 'irregular_plural_subject_verb_agreement_2', 'determiner_noun_agreement_with_adj_irregular_1', 'determiner_noun_agreement_with_adjective_1', 'irregular_plural_subject_verb_agreement_1', 'regular_plural_subject_verb_agreement_2', 'sentential_negation_npi_licensor_present', 'principle_A_c_command', 'principle_A_reconstruction', 'superlative_quantifiers_2', 'determiner_noun_agreement_irregular_1', 'principle_A_domain_2', 'determiner_noun_agreement_1', 'only_npi_scope', 'distractor_agreement_relational_noun', 'passive_2', 'passive_1', 'determiner_noun_agreement_2', 'regular_plural_subject_verb_agreement_1', 'determiner_noun_agreement_with_adj_irregular_2', 'determiner_noun_agreement_irregular_2', 'only_npi_licensor_present'}\n" + "{'npi-sent-neg', 'npi-only', 'npi-sim-ques', 'existential-there-quantifier', 'pp-mod-subj', 'binding-c-command', 'binding-case', 'rel-cl', 'binding-reconstruction', 'det-noun', 'full', 'det-adj-noun', 'passive', 'binding-domain', 're-irr-sv-agr', 'superlative-quantifier'}\n", + "{'superlative_quantifiers_2', 'distractor_agreement_relative_clause', 'principle_A_c_command', 'distractor_agreement_relational_noun', 'regular_plural_subject_verb_agreement_1', 'irregular_plural_subject_verb_agreement_1', 'determiner_noun_agreement_with_adj_irregular_1', 'principle_A_case_2', 'sentential_negation_npi_scope', 'principle_A_domain_3', 'principle_A_case_1', 'determiner_noun_agreement_irregular_2', 'determiner_noun_agreement_with_adj_irregular_2', 'regular_plural_subject_verb_agreement_2', 'determiner_noun_agreement_with_adjective_1', 'matrix_question_npi_licensor_present', 'determiner_noun_agreement_1', 'only_npi_licensor_present', 'determiner_noun_agreement_irregular_1', 'determiner_noun_agreement_2', 'passive_2', 'existential_there_quantifiers_1', 'superlative_quantifiers_1', 'determiner_noun_agreement_with_adj_2', 'sentential_negation_npi_licensor_present', 'principle_A_domain_2', 'passive_1', 'principle_A_domain_1', 'irregular_plural_subject_verb_agreement_2', 'principle_A_reconstruction', 'only_npi_scope'}\n" ] } ], @@ -85,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -266,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -276,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -296,7 +296,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -324,6 +324,32 @@ "print(cohen_d(ppl_unstacked[\"lstm\"], ppl_unstacked[\"transformer\"]))\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Correlation between ppl and corpus tokens by architecture:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PearsonRResult(statistic=-0.9694350791008717, pvalue=0.0)\n", + "PearsonRResult(statistic=-0.9755968743080783, pvalue=0.0)\n" + ] + } + ], + "source": [ + "print(scipy.stats.pearsonr(main_data[main_data[\"arch\"]==\"lstm\"][\"corpus_tokens\"], main_data[main_data[\"arch\"]==\"lstm\"][\"test_ppl\"]))\n", + "print(scipy.stats.pearsonr(main_data[main_data[\"arch\"]==\"transformer\"][\"corpus_tokens\"], main_data[main_data[\"arch\"]==\"transformer\"][\"test_ppl\"]))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -340,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -373,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -401,104 +427,67 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Next steps" + "## Regression Analysis\n", + "\n", + "Here's an attempt at a regression that attempts to decipher what factors are and are not responsible for the accuracy deltas. Let me know what you think and/or what I'm forgetting!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rough summary: architecture _on its own_ is not significant! Neither is _test perplexity_! The only consistent factor is _filter-target_ and all of the interactions with it. Does this make sense? I'm also not sure this is the best specification of the predictors for the model. I did random intercepts for each combination of corpus and benchmark; I found similar things when only doing random intercepts for corpus." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " Mixed Linear Model Regression Results\n", - "=============================================================\n", - "Model: MixedLM Dependent Variable: test_ppl \n", - "No. Observations: 10720 Method: REML \n", - "No. Groups: 2 Scale: 0.0268 \n", - "Min. group size: 5360 Log-Likelihood: 4158.2101\n", - "Max. group size: 5360 Converged: Yes \n", - "Mean group size: 5360.0 \n", - "-------------------------------------------------------------\n", - " Coef. Std.Err. z P>|z| [0.025 0.975]\n", - "-------------------------------------------------------------\n", - "Intercept 110.442 2.130 51.849 0.000 106.267 114.617\n", - "corpus_tokens -0.000 0.000 -419.795 0.000 -0.000 -0.000\n", - "Group Var 9.034 41.311 \n", - "=============================================================\n", - "\n", - " OLS Regression Results \n", - "==============================================================================\n", - "Dep. Variable: test_ppl R-squared: 0.998\n", - "Model: OLS Adj. R-squared: 0.998\n", - "Method: Least Squares F-statistic: 1.499e+06\n", - "Date: Tue, 23 Apr 2024 Prob (F-statistic): 0.00\n", - "Time: 11:35:29 Log-Likelihood: 4427.0\n", - "No. Observations: 10720 AIC: -8846.\n", - "Df Residuals: 10716 BIC: -8817.\n", - "Df Model: 3 \n", - "Covariance Type: nonrobust \n", - "=====================================================================================================\n", - " coef std err t P>|t| [0.025 0.975]\n", - "-----------------------------------------------------------------------------------------------------\n", - "Intercept 116.7298 0.198 589.364 0.000 116.342 117.118\n", - "arch[T.transformer] -12.5748 0.280 -44.894 0.000 -13.124 -12.026\n", - "corpus_tokens -9.572e-07 3e-09 -318.976 0.000 -9.63e-07 -9.51e-07\n", - "corpus_tokens:arch[T.transformer] 9.319e-08 4.24e-09 21.959 0.000 8.49e-08 1.02e-07\n", - "==============================================================================\n", - "Omnibus: 2121.682 Durbin-Watson: 0.697\n", - "Prob(Omnibus): 0.000 Jarque-Bera (JB): 5421.708\n", - "Skew: -1.083 Prob(JB): 0.00\n", - "Kurtosis: 5.729 Cond. No. 1.56e+10\n", - "==============================================================================\n", - "\n", - "Notes:\n", - "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", - "[2] The condition number is large, 1.56e+10. This might indicate that there are\n", - "strong multicollinearity or other numerical problems.\n" + " Mixed Linear Model Regression Results\n", + "=============================================================================================\n", + "Model: MixedLM Dependent Variable: blimp_delta\n", + "No. Observations: 10720 Method: REML \n", + "No. Groups: 16 Scale: 0.0014 \n", + "Min. group size: 670 Log-Likelihood: 19977.0387 \n", + "Max. group size: 670 Converged: Yes \n", + "Mean group size: 670.0 \n", + "---------------------------------------------------------------------------------------------\n", + " Coef. Std.Err. z P>|z| [0.025 0.975]\n", + "---------------------------------------------------------------------------------------------\n", + "Intercept 0.365 0.438 0.833 0.405 -0.494 1.225\n", + "filter_target[T.True] 2.492 0.272 9.169 0.000 1.959 3.025\n", + "arch[T.transformer] 0.065 0.054 1.215 0.224 -0.040 0.171\n", + "filter_target[T.True]:arch[T.transformer] -1.987 0.377 -5.265 0.000 -2.726 -1.247\n", + "corpus_tokens -0.000 0.000 -0.721 0.471 -0.000 0.000\n", + "test_ppl -0.004 0.004 -0.958 0.338 -0.011 0.004\n", + "test_ppl:filter_target[T.True] -0.047 0.005 -9.316 0.000 -0.057 -0.037\n", + "test_ppl:arch[T.transformer] -0.002 0.001 -1.535 0.125 -0.004 0.000\n", + "test_ppl:filter_target[T.True]:arch[T.transformer] 0.036 0.008 4.733 0.000 0.021 0.050\n", + "Group Var 0.000 0.000 \n", + "=============================================================================================\n", + "\n" ] }, { - "ename": "AttributeError", - "evalue": "'MixedLMResults' object has no attribute 'ssr'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m fixed_test_ppl_result \u001b[38;5;241m=\u001b[39m fixed_test_ppl_model\u001b[38;5;241m.\u001b[39mfit()\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(fixed_test_ppl_result\u001b[38;5;241m.\u001b[39msummary())\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43msm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstats\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43manova_lm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfixed_test_ppl_result\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmixed_test_ppl_result\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 11\u001b[0m lstm_test_ppl_model \u001b[38;5;241m=\u001b[39m smf\u001b[38;5;241m.\u001b[39mols(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_ppl ~ corpus_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m, main_data[main_data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124march\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlstm\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 12\u001b[0m lstm_test_ppl_result \u001b[38;5;241m=\u001b[39m lstm_test_ppl_model\u001b[38;5;241m.\u001b[39mfit()\n", - "File \u001b[0;32m~/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/statsmodels/stats/anova.py:369\u001b[0m, in \u001b[0;36manova_lm\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m scale: \u001b[38;5;66;03m# assume biggest model is last\u001b[39;00m\n\u001b[1;32m 367\u001b[0m scale \u001b[38;5;241m=\u001b[39m args[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mscale\n\u001b[0;32m--> 369\u001b[0m table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mssr\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [mdl\u001b[38;5;241m.\u001b[39mssr \u001b[38;5;28;01mfor\u001b[39;00m mdl \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[1;32m 370\u001b[0m table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_resid\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [mdl\u001b[38;5;241m.\u001b[39mdf_resid \u001b[38;5;28;01mfor\u001b[39;00m mdl \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[1;32m 371\u001b[0m table\u001b[38;5;241m.\u001b[39mloc[table\u001b[38;5;241m.\u001b[39mindex[\u001b[38;5;241m1\u001b[39m:], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_diff\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39mnp\u001b[38;5;241m.\u001b[39mdiff(table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_resid\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mvalues)\n", - "File \u001b[0;32m~/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/statsmodels/stats/anova.py:369\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m scale: \u001b[38;5;66;03m# assume biggest model is last\u001b[39;00m\n\u001b[1;32m 367\u001b[0m scale \u001b[38;5;241m=\u001b[39m args[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mscale\n\u001b[0;32m--> 369\u001b[0m table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mssr\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [\u001b[43mmdl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mssr\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m mdl \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[1;32m 370\u001b[0m table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_resid\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [mdl\u001b[38;5;241m.\u001b[39mdf_resid \u001b[38;5;28;01mfor\u001b[39;00m mdl \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[1;32m 371\u001b[0m table\u001b[38;5;241m.\u001b[39mloc[table\u001b[38;5;241m.\u001b[39mindex[\u001b[38;5;241m1\u001b[39m:], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_diff\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39mnp\u001b[38;5;241m.\u001b[39mdiff(table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_resid\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mvalues)\n", - "File \u001b[0;32m~/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/statsmodels/base/wrapper.py:34\u001b[0m, in \u001b[0;36mResultsWrapper.__getattribute__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mresults\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 35\u001b[0m data \u001b[38;5;241m=\u001b[39m results\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mdata\n\u001b[1;32m 36\u001b[0m how \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wrap_attrs\u001b[38;5;241m.\u001b[39mget(attr)\n", - "\u001b[0;31mAttributeError\u001b[0m: 'MixedLMResults' object has no attribute 'ssr'" + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/statsmodels/regression/mixed_linear_model.py:2238: ConvergenceWarning: The MLE may be on the boundary of the parameter space.\n", + " warnings.warn(msg, ConvergenceWarning)\n" ] } ], "source": [ - "mixed_test_ppl_model = smf.mixedlm(\"test_ppl ~ corpus_tokens\", main_data, groups=main_data[\"arch\"])\n", - "mixed_test_ppl_result = mixed_test_ppl_model.fit()\n", - "print(mixed_test_ppl_result.summary())\n", - "\n", - "fixed_test_ppl_model = smf.ols(\"test_ppl ~ corpus_tokens + arch + corpus_tokens*arch\", main_data)\n", - "fixed_test_ppl_result = fixed_test_ppl_model.fit()\n", - "print(fixed_test_ppl_result.summary())\n", - "\n", - "print(sm.stats.anova_lm(fixed_test_ppl_result, mixed_test_ppl_result))\n", - "\n", - "lstm_test_ppl_model = smf.ols(\"test_ppl ~ corpus_tokens\", main_data[main_data[\"arch\"] == \"lstm\"])\n", - "lstm_test_ppl_result = lstm_test_ppl_model.fit()\n", - "print(lstm_test_ppl_result.summary())\n", - "\n", - "print(scipy.stats.pearsonr(main_data[main_data[\"arch\"]==\"lstm\"][\"corpus_tokens\"], main_data[main_data[\"arch\"]==\"lstm\"][\"test_ppl\"]))" + "main_data[\"corpus-and-benchmark\"] = main_data[\"corpus\"] + \"-\" + main_data[\"blimp_benchmark\"]\n", + "full_regression = smf.mixedlm(\"blimp_delta ~ corpus_tokens + test_ppl*filter_target*arch\", main_data, groups=main_data[\"corpus-and-benchmark\"])\n", + "full_regression_result = full_regression.fit()\n", + "print(full_regression_result.summary())\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {