Skip to content

Commit

Permalink
add regression to notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
shanest committed May 7, 2024
1 parent 4c392b6 commit e425c90
Showing 1 changed file with 79 additions and 90 deletions.
169 changes: 79 additions & 90 deletions results/analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -14,15 +14,15 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'binding-reconstruction', 'binding-domain', 'rel-cl', 'npi-sim-ques', 'binding-c-command', 'binding-case', 'passive', 'existential-there-quantifier', 'det-noun', 'npi-only', 'pp-mod-subj', 'full', 're-irr-sv-agr', 'det-adj-noun', 'npi-sent-neg', 'superlative-quantifier'}\n",
"{'sentential_negation_npi_scope', 'existential_there_quantifiers_1', 'matrix_question_npi_licensor_present', 'superlative_quantifiers_1', 'principle_A_case_1', 'principle_A_domain_1', 'principle_A_domain_3', 'principle_A_case_2', 'determiner_noun_agreement_with_adj_2', 'distractor_agreement_relative_clause', 'irregular_plural_subject_verb_agreement_2', 'determiner_noun_agreement_with_adj_irregular_1', 'determiner_noun_agreement_with_adjective_1', 'irregular_plural_subject_verb_agreement_1', 'regular_plural_subject_verb_agreement_2', 'sentential_negation_npi_licensor_present', 'principle_A_c_command', 'principle_A_reconstruction', 'superlative_quantifiers_2', 'determiner_noun_agreement_irregular_1', 'principle_A_domain_2', 'determiner_noun_agreement_1', 'only_npi_scope', 'distractor_agreement_relational_noun', 'passive_2', 'passive_1', 'determiner_noun_agreement_2', 'regular_plural_subject_verb_agreement_1', 'determiner_noun_agreement_with_adj_irregular_2', 'determiner_noun_agreement_irregular_2', 'only_npi_licensor_present'}\n"
"{'npi-sent-neg', 'npi-only', 'npi-sim-ques', 'existential-there-quantifier', 'pp-mod-subj', 'binding-c-command', 'binding-case', 'rel-cl', 'binding-reconstruction', 'det-noun', 'full', 'det-adj-noun', 'passive', 'binding-domain', 're-irr-sv-agr', 'superlative-quantifier'}\n",
"{'superlative_quantifiers_2', 'distractor_agreement_relative_clause', 'principle_A_c_command', 'distractor_agreement_relational_noun', 'regular_plural_subject_verb_agreement_1', 'irregular_plural_subject_verb_agreement_1', 'determiner_noun_agreement_with_adj_irregular_1', 'principle_A_case_2', 'sentential_negation_npi_scope', 'principle_A_domain_3', 'principle_A_case_1', 'determiner_noun_agreement_irregular_2', 'determiner_noun_agreement_with_adj_irregular_2', 'regular_plural_subject_verb_agreement_2', 'determiner_noun_agreement_with_adjective_1', 'matrix_question_npi_licensor_present', 'determiner_noun_agreement_1', 'only_npi_licensor_present', 'determiner_noun_agreement_irregular_1', 'determiner_noun_agreement_2', 'passive_2', 'existential_there_quantifiers_1', 'superlative_quantifiers_1', 'determiner_noun_agreement_with_adj_2', 'sentential_negation_npi_licensor_present', 'principle_A_domain_2', 'passive_1', 'principle_A_domain_1', 'irregular_plural_subject_verb_agreement_2', 'principle_A_reconstruction', 'only_npi_scope'}\n"
]
}
],
Expand Down Expand Up @@ -85,7 +85,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -266,7 +266,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -276,7 +276,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -296,7 +296,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -324,6 +324,32 @@
"print(cohen_d(ppl_unstacked[\"lstm\"], ppl_unstacked[\"transformer\"]))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Correlation between ppl and corpus tokens by architecture:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PearsonRResult(statistic=-0.9694350791008717, pvalue=0.0)\n",
"PearsonRResult(statistic=-0.9755968743080783, pvalue=0.0)\n"
]
}
],
"source": [
"print(scipy.stats.pearsonr(main_data[main_data[\"arch\"]==\"lstm\"][\"corpus_tokens\"], main_data[main_data[\"arch\"]==\"lstm\"][\"test_ppl\"]))\n",
"print(scipy.stats.pearsonr(main_data[main_data[\"arch\"]==\"transformer\"][\"corpus_tokens\"], main_data[main_data[\"arch\"]==\"transformer\"][\"test_ppl\"]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -340,7 +366,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -373,7 +399,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -401,104 +427,67 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Next steps"
"## Regression Analysis\n",
"\n",
"Here's an attempt at a regression that attempts to decipher what factors are and are not responsible for the accuracy deltas. Let me know what you think and/or what I'm forgetting!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rough summary: architecture _on its own_ is not significant! Neither is _test perplexity_! The only consistent factor is _filter-target_ and all of the interactions with it. Does this make sense? I'm also not sure this is the best specification of the predictors for the model. I did random intercepts for each combination of corpus and benchmark; I found similar things when only doing random intercepts for corpus."
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Mixed Linear Model Regression Results\n",
"=============================================================\n",
"Model: MixedLM Dependent Variable: test_ppl \n",
"No. Observations: 10720 Method: REML \n",
"No. Groups: 2 Scale: 0.0268 \n",
"Min. group size: 5360 Log-Likelihood: 4158.2101\n",
"Max. group size: 5360 Converged: Yes \n",
"Mean group size: 5360.0 \n",
"-------------------------------------------------------------\n",
" Coef. Std.Err. z P>|z| [0.025 0.975]\n",
"-------------------------------------------------------------\n",
"Intercept 110.442 2.130 51.849 0.000 106.267 114.617\n",
"corpus_tokens -0.000 0.000 -419.795 0.000 -0.000 -0.000\n",
"Group Var 9.034 41.311 \n",
"=============================================================\n",
"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: test_ppl R-squared: 0.998\n",
"Model: OLS Adj. R-squared: 0.998\n",
"Method: Least Squares F-statistic: 1.499e+06\n",
"Date: Tue, 23 Apr 2024 Prob (F-statistic): 0.00\n",
"Time: 11:35:29 Log-Likelihood: 4427.0\n",
"No. Observations: 10720 AIC: -8846.\n",
"Df Residuals: 10716 BIC: -8817.\n",
"Df Model: 3 \n",
"Covariance Type: nonrobust \n",
"=====================================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"-----------------------------------------------------------------------------------------------------\n",
"Intercept 116.7298 0.198 589.364 0.000 116.342 117.118\n",
"arch[T.transformer] -12.5748 0.280 -44.894 0.000 -13.124 -12.026\n",
"corpus_tokens -9.572e-07 3e-09 -318.976 0.000 -9.63e-07 -9.51e-07\n",
"corpus_tokens:arch[T.transformer] 9.319e-08 4.24e-09 21.959 0.000 8.49e-08 1.02e-07\n",
"==============================================================================\n",
"Omnibus: 2121.682 Durbin-Watson: 0.697\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 5421.708\n",
"Skew: -1.083 Prob(JB): 0.00\n",
"Kurtosis: 5.729 Cond. No. 1.56e+10\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 1.56e+10. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n"
" Mixed Linear Model Regression Results\n",
"=============================================================================================\n",
"Model: MixedLM Dependent Variable: blimp_delta\n",
"No. Observations: 10720 Method: REML \n",
"No. Groups: 16 Scale: 0.0014 \n",
"Min. group size: 670 Log-Likelihood: 19977.0387 \n",
"Max. group size: 670 Converged: Yes \n",
"Mean group size: 670.0 \n",
"---------------------------------------------------------------------------------------------\n",
" Coef. Std.Err. z P>|z| [0.025 0.975]\n",
"---------------------------------------------------------------------------------------------\n",
"Intercept 0.365 0.438 0.833 0.405 -0.494 1.225\n",
"filter_target[T.True] 2.492 0.272 9.169 0.000 1.959 3.025\n",
"arch[T.transformer] 0.065 0.054 1.215 0.224 -0.040 0.171\n",
"filter_target[T.True]:arch[T.transformer] -1.987 0.377 -5.265 0.000 -2.726 -1.247\n",
"corpus_tokens -0.000 0.000 -0.721 0.471 -0.000 0.000\n",
"test_ppl -0.004 0.004 -0.958 0.338 -0.011 0.004\n",
"test_ppl:filter_target[T.True] -0.047 0.005 -9.316 0.000 -0.057 -0.037\n",
"test_ppl:arch[T.transformer] -0.002 0.001 -1.535 0.125 -0.004 0.000\n",
"test_ppl:filter_target[T.True]:arch[T.transformer] 0.036 0.008 4.733 0.000 0.021 0.050\n",
"Group Var 0.000 0.000 \n",
"=============================================================================================\n",
"\n"
]
},
{
"ename": "AttributeError",
"evalue": "'MixedLMResults' object has no attribute 'ssr'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[9], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m fixed_test_ppl_result \u001b[38;5;241m=\u001b[39m fixed_test_ppl_model\u001b[38;5;241m.\u001b[39mfit()\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(fixed_test_ppl_result\u001b[38;5;241m.\u001b[39msummary())\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43msm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstats\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43manova_lm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfixed_test_ppl_result\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmixed_test_ppl_result\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 11\u001b[0m lstm_test_ppl_model \u001b[38;5;241m=\u001b[39m smf\u001b[38;5;241m.\u001b[39mols(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_ppl ~ corpus_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m, main_data[main_data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124march\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlstm\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 12\u001b[0m lstm_test_ppl_result \u001b[38;5;241m=\u001b[39m lstm_test_ppl_model\u001b[38;5;241m.\u001b[39mfit()\n",
"File \u001b[0;32m~/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/statsmodels/stats/anova.py:369\u001b[0m, in \u001b[0;36manova_lm\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m scale: \u001b[38;5;66;03m# assume biggest model is last\u001b[39;00m\n\u001b[1;32m 367\u001b[0m scale \u001b[38;5;241m=\u001b[39m args[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mscale\n\u001b[0;32m--> 369\u001b[0m table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mssr\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [mdl\u001b[38;5;241m.\u001b[39mssr \u001b[38;5;28;01mfor\u001b[39;00m mdl \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[1;32m 370\u001b[0m table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_resid\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [mdl\u001b[38;5;241m.\u001b[39mdf_resid \u001b[38;5;28;01mfor\u001b[39;00m mdl \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[1;32m 371\u001b[0m table\u001b[38;5;241m.\u001b[39mloc[table\u001b[38;5;241m.\u001b[39mindex[\u001b[38;5;241m1\u001b[39m:], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_diff\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39mnp\u001b[38;5;241m.\u001b[39mdiff(table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_resid\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mvalues)\n",
"File \u001b[0;32m~/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/statsmodels/stats/anova.py:369\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m scale: \u001b[38;5;66;03m# assume biggest model is last\u001b[39;00m\n\u001b[1;32m 367\u001b[0m scale \u001b[38;5;241m=\u001b[39m args[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mscale\n\u001b[0;32m--> 369\u001b[0m table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mssr\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [\u001b[43mmdl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mssr\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m mdl \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[1;32m 370\u001b[0m table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_resid\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [mdl\u001b[38;5;241m.\u001b[39mdf_resid \u001b[38;5;28;01mfor\u001b[39;00m mdl \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[1;32m 371\u001b[0m table\u001b[38;5;241m.\u001b[39mloc[table\u001b[38;5;241m.\u001b[39mindex[\u001b[38;5;241m1\u001b[39m:], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_diff\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39mnp\u001b[38;5;241m.\u001b[39mdiff(table[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdf_resid\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mvalues)\n",
"File \u001b[0;32m~/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/statsmodels/base/wrapper.py:34\u001b[0m, in \u001b[0;36mResultsWrapper.__getattribute__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mresults\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 35\u001b[0m data \u001b[38;5;241m=\u001b[39m results\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mdata\n\u001b[1;32m 36\u001b[0m how \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wrap_attrs\u001b[38;5;241m.\u001b[39mget(attr)\n",
"\u001b[0;31mAttributeError\u001b[0m: 'MixedLMResults' object has no attribute 'ssr'"
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/shanest/opt/anaconda3/envs/corpus-filtering/lib/python3.9/site-packages/statsmodels/regression/mixed_linear_model.py:2238: ConvergenceWarning: The MLE may be on the boundary of the parameter space.\n",
" warnings.warn(msg, ConvergenceWarning)\n"
]
}
],
"source": [
"mixed_test_ppl_model = smf.mixedlm(\"test_ppl ~ corpus_tokens\", main_data, groups=main_data[\"arch\"])\n",
"mixed_test_ppl_result = mixed_test_ppl_model.fit()\n",
"print(mixed_test_ppl_result.summary())\n",
"\n",
"fixed_test_ppl_model = smf.ols(\"test_ppl ~ corpus_tokens + arch + corpus_tokens*arch\", main_data)\n",
"fixed_test_ppl_result = fixed_test_ppl_model.fit()\n",
"print(fixed_test_ppl_result.summary())\n",
"\n",
"print(sm.stats.anova_lm(fixed_test_ppl_result, mixed_test_ppl_result))\n",
"\n",
"lstm_test_ppl_model = smf.ols(\"test_ppl ~ corpus_tokens\", main_data[main_data[\"arch\"] == \"lstm\"])\n",
"lstm_test_ppl_result = lstm_test_ppl_model.fit()\n",
"print(lstm_test_ppl_result.summary())\n",
"\n",
"print(scipy.stats.pearsonr(main_data[main_data[\"arch\"]==\"lstm\"][\"corpus_tokens\"], main_data[main_data[\"arch\"]==\"lstm\"][\"test_ppl\"]))"
"main_data[\"corpus-and-benchmark\"] = main_data[\"corpus\"] + \"-\" + main_data[\"blimp_benchmark\"]\n",
"full_regression = smf.mixedlm(\"blimp_delta ~ corpus_tokens + test_ppl*filter_target*arch\", main_data, groups=main_data[\"corpus-and-benchmark\"])\n",
"full_regression_result = full_regression.fit()\n",
"print(full_regression_result.summary())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit e425c90

Please sign in to comment.