diff --git a/runs.ipynb b/runs.ipynb
index bc15760aa..fbff86287 100644
--- a/runs.ipynb
+++ b/runs.ipynb
@@ -81,9 +81,22 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "ename": "ModuleNotFoundError",
+ "evalue": "No module named 'lightgbm'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[1], line 20\u001b[0m\n\u001b[1;32m 17\u001b[0m warnings\u001b[38;5;241m.\u001b[39msimplefilter(action\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m'\u001b[39m, category\u001b[38;5;241m=\u001b[39m\u001b[38;5;167;01mFutureWarning\u001b[39;00m)\n\u001b[1;32m 19\u001b[0m sys\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgrn_benchmark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhelper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m surragate_names\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhelper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 22\u001b[0m par \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 23\u001b[0m \u001b[38;5;66;03m# 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'genie3', 'grnboost2', 'scenic', 'scglue', 'celloracle'],\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmethods\u001b[39m\u001b[38;5;124m'\u001b[39m: [ \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcollectri\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnegative_control\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpositive_control\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpearson_corr\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mportia\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mppcor\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgrnboost2\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscenic\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscglue\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcelloracle\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscenicplus\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 25\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodels_dir\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/grn_models/\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 26\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscores_dir\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/scores\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 27\u001b[0m }\n",
+ "File \u001b[0;32m~/projs/ongoing/task_grn_inference/../grn_benchmark/src/helper.py:6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mlightgbm\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mlgb\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodel_selection\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m cross_validate\n",
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'lightgbm'"
+ ]
+ }
+ ],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
@@ -104,7 +117,7 @@
"warnings.simplefilter(action='ignore', category=FutureWarning)\n",
"\n",
"sys.path.append('../')\n",
- "from grn_benchmark.src.commons import surragate_names\n",
+ "from grn_benchmark.src.helper import surragate_names\n",
"from src.helper import *\n",
"par = {\n",
" # 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'genie3', 'grnboost2', 'scenic', 'scglue', 'celloracle'],\n",
@@ -128,14 +141,7 @@
"outputs": [],
"source": [
"if False: \n",
- " create_skeleton() # create tf2gene putative links\n",
- "if False: # check how predictions are included in the skeleton \n",
- " all_links = 'path_2_skeleton'\n",
- " par['models_dir'] = 'resources/grn_models/d0_hvg'\n",
- " for method in ['scenicplus']:\n",
- " prediction = pd.read_csv(f\"{par['models_dir']}/{method}.csv\", index_col=0)\n",
- " prediction['link'] = prediction['source'].astype(str) + '_' + prediction['target'].astype(str)\n",
- " print(method, len(prediction), np.intersect1d(all_links, prediction['link']).shape)"
+ " create_skeleton() # create tf2gene putative links\n"
]
},
{
@@ -147,34 +153,42 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "sbatch: error: Unable to open file scripts/sbatch/ppcor.sh\n"
+ "scenicplus\n",
+ "Job scenicplus submitted successfully.\n",
+ "Submitted batch job 7765370\n",
+ "\n"
]
}
],
"source": [
- "# # !sacct \n",
- "# "
+ "if True: # local runs\n",
+ " run_grn_inference()\n",
+ "if False: # r based methods\n",
+ " !sbatch scripts/sbatch/ppcor.sh"
]
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 62,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "han.h5ad jackson.h5ad\tshalek.h5ad\n"
+ ]
+ }
+ ],
"source": [
- "if False: # local runs\n",
- " run_grn_inference()\n",
- "if False: # r based methods\n",
- " !sbatch scripts/sbatch/ppcor.sh\n",
- "if False: # seqera (celloracle) #TODO: add this to local \n",
- " run_grn_inference_seqera()"
+ "!ls resources/grn-benchmark/mccalla/inference"
]
},
{
@@ -186,14 +200,14 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Submitted batch job 7759699\n"
+ "Submitted batch job 7761215\n"
]
}
],
@@ -207,532 +221,491 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " S1 | \n",
- " S2 | \n",
- " static-theta-0.0 | \n",
- " static-theta-0.5 | \n",
- " rank | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " scenicplus | \n",
- " 0.245033 | \n",
- " 0.403494 | \n",
- " 0.760583 | \n",
- " 0.539209 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " collectri | \n",
- " -0.100238 | \n",
- " -0.211182 | \n",
- " 0.485506 | \n",
- " 0.457259 | \n",
- " 11 | \n",
- "
\n",
- " \n",
- " negative_control | \n",
- " -0.039305 | \n",
- " -0.041004 | \n",
- " 0.274659 | \n",
- " 0.440383 | \n",
- " 12 | \n",
- "
\n",
- " \n",
- " positive_control | \n",
- " 0.197129 | \n",
- " 0.578822 | \n",
- " 0.872003 | \n",
- " 0.595489 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " pearson_corr | \n",
- " 0.269379 | \n",
- " 0.509297 | \n",
- " 0.735156 | \n",
- " 0.517056 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " portia | \n",
- " 0.148941 | \n",
- " 0.227248 | \n",
- " 0.473607 | \n",
- " 0.467607 | \n",
- " 9 | \n",
- "
\n",
- " \n",
- " ppcor | \n",
- " 0.022846 | \n",
- " 0.094107 | \n",
- " 0.430776 | \n",
- " 0.449144 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " grnboost2 | \n",
- " 0.381032 | \n",
- " 0.459860 | \n",
- " 0.748175 | \n",
- " 0.615790 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " scenic | \n",
- " 0.144696 | \n",
- " 0.206571 | \n",
- " 0.685034 | \n",
- " 0.556485 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " scglue | \n",
- " 0.078309 | \n",
- " 0.238859 | \n",
- " 0.530531 | \n",
- " 0.483423 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " celloracle | \n",
- " 0.216897 | \n",
- " 0.311451 | \n",
- " 0.711549 | \n",
- " 0.564160 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " scenicplus | \n",
- " 0.245033 | \n",
- " 0.403494 | \n",
- " 0.760583 | \n",
- " 0.539209 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " S1 S2 static-theta-0.0 static-theta-0.5 rank\n",
- "scenicplus 0.245033 0.403494 0.760583 0.539209 4\n",
- "collectri -0.100238 -0.211182 0.485506 0.457259 11\n",
- "negative_control -0.039305 -0.041004 0.274659 0.440383 12\n",
- "positive_control 0.197129 0.578822 0.872003 0.595489 2\n",
- "pearson_corr 0.269379 0.509297 0.735156 0.517056 3\n",
- "portia 0.148941 0.227248 0.473607 0.467607 9\n",
- "ppcor 0.022846 0.094107 0.430776 0.449144 10\n",
- "grnboost2 0.381032 0.459860 0.748175 0.615790 1\n",
- "scenic 0.144696 0.206571 0.685034 0.556485 7\n",
- "scglue 0.078309 0.238859 0.530531 0.483423 8\n",
- "celloracle 0.216897 0.311451 0.711549 0.564160 6\n",
- "scenicplus 0.245033 0.403494 0.760583 0.539209 4"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10000-skeleton_False-binarize_False_lognorm-ridge.csv\n",
+ "10000-skeleton_False-binarize_False_pearson-ridge.csv\n",
+ "10000-skeleton_False-binarize_True_lognorm-ridge.csv\n",
+ "10000-skeleton_False-binarize_True_pearson-ridge.csv\n",
+ "10000-skeleton_True-binarize_False_lognorm-ridge.csv\n",
+ "10000-skeleton_True-binarize_False_pearson-ridge.csv\n",
+ "10000-skeleton_True-binarize_True_lognorm-ridge.csv\n",
+ "10000-skeleton_True-binarize_True_pearson-ridge.csv\n",
+ "50000-skeleton_False-binarize_False_lognorm-ridge.csv\n",
+ "50000-skeleton_False-binarize_False_pearson-ridge.csv\n",
+ "50000-skeleton_False-binarize_True_lognorm-ridge.csv\n",
+ "50000-skeleton_False-binarize_True_pearson-ridge.csv\n",
+ "50000-skeleton_True-binarize_False_lognorm-ridge.csv\n",
+ "50000-skeleton_True-binarize_False_pearson-ridge.csv\n",
+ "50000-skeleton_True-binarize_True_lognorm-ridge.csv\n",
+ "50000-skeleton_True-binarize_True_pearson-ridge.csv\n"
+ ]
}
],
"source": [
- "df_scores = pd.read_csv(f\"resources/scores/hvg/skeleton_False/scgen_pearson-ridge.csv\", index_col=0)\n",
- "df_all_n = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n",
- "df_scores['rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n",
- "df_scores"
+ "!ls resources/scores/"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- "\n",
+ "\n",
" \n",
" \n",
" | \n",
- " S1 | \n",
- " S2 | \n",
- " static-theta-0.0 | \n",
- " static-theta-0.5 | \n",
- " rank | \n",
+ " S1 | \n",
+ " S2 | \n",
+ " static-theta-0.0 | \n",
+ " static-theta-0.5 | \n",
+ " static-theta-1.0 | \n",
+ " rank | \n",
"
\n",
" \n",
" \n",
" \n",
- " collectri | \n",
- " -0.100238 | \n",
- " -0.211182 | \n",
- " 0.485506 | \n",
- " 0.457259 | \n",
- " 11 | \n",
- "
\n",
- " \n",
- " negative_control | \n",
- " -0.044574 | \n",
- " -0.045158 | \n",
- " 0.359805 | \n",
- " 0.438451 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " positive_control | \n",
- " 0.197129 | \n",
- " 0.578822 | \n",
- " 0.872003 | \n",
- " 0.595489 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " pearson_corr | \n",
- " 0.273443 | \n",
- " 0.516343 | \n",
- " 0.782978 | \n",
- " 0.538252 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " portia | \n",
- " 0.263310 | \n",
- " 0.357006 | \n",
- " 0.566365 | \n",
- " 0.507570 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " ppcor | \n",
- " 0.017954 | \n",
- " 0.159754 | \n",
- " 0.468049 | \n",
- " 0.454995 | \n",
- " 9 | \n",
- "
\n",
- " \n",
- " grnboost2 | \n",
- " 0.421936 | \n",
- " 0.489322 | \n",
- " 0.788931 | \n",
- " 0.629471 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " scenic | \n",
- " 0.168006 | \n",
- " 0.218916 | \n",
- " 0.756965 | \n",
- " 0.565434 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " granie | \n",
- " 0.083298 | \n",
- " 0.106012 | \n",
- " 0.194164 | \n",
- " 0.363425 | \n",
- " 12 | \n",
- "
\n",
- " \n",
- " scglue | \n",
- " 0.080857 | \n",
- " 0.293630 | \n",
- " 0.660357 | \n",
- " 0.480734 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " celloracle | \n",
- " 0.209151 | \n",
- " 0.291478 | \n",
- " 0.690099 | \n",
- " 0.576343 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " figr | \n",
- " 0.113645 | \n",
- " 0.193131 | \n",
- " 0.428032 | \n",
- " 0.465268 | \n",
- " 8 | \n",
+ " collectri | \n",
+ " -0.052885 | \n",
+ " -0.162282 | \n",
+ " 0.241939 | \n",
+ " 0.311653 | \n",
+ " 0.279391 | \n",
+ " 13 | \n",
+ "
\n",
+ " \n",
+ " negative_control | \n",
+ " -0.038053 | \n",
+ " -0.053455 | \n",
+ " 0.204835 | \n",
+ " 0.298616 | \n",
+ " 0.279320 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " positive_control | \n",
+ " 0.285482 | \n",
+ " 0.497354 | \n",
+ " 0.558247 | \n",
+ " 0.437084 | \n",
+ " 0.291442 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " pearson_corr | \n",
+ " 0.227900 | \n",
+ " 0.418176 | \n",
+ " 0.558176 | \n",
+ " 0.433777 | \n",
+ " 0.290445 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " portia | \n",
+ " 0.114365 | \n",
+ " 0.247659 | \n",
+ " 0.463710 | \n",
+ " 0.342678 | \n",
+ " 0.281718 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " ppcor | \n",
+ " -0.009030 | \n",
+ " -0.026782 | \n",
+ " 0.328378 | \n",
+ " 0.315634 | \n",
+ " 0.278937 | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " grnboost2 | \n",
+ " 0.277384 | \n",
+ " 0.388048 | \n",
+ " 0.583035 | \n",
+ " 0.501260 | \n",
+ " 0.305520 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " scenic | \n",
+ " 0.132473 | \n",
+ " 0.197608 | \n",
+ " 0.530981 | \n",
+ " 0.448723 | \n",
+ " 0.308812 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " granie | \n",
+ " 0.065566 | \n",
+ " 0.088314 | \n",
+ " 0.166634 | \n",
+ " 0.254159 | \n",
+ " 0.268779 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " scglue | \n",
+ " 0.054329 | \n",
+ " 0.253396 | \n",
+ " 0.477133 | \n",
+ " 0.334458 | \n",
+ " 0.282163 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " celloracle | \n",
+ " 0.176059 | \n",
+ " 0.240599 | \n",
+ " 0.578366 | \n",
+ " 0.468557 | \n",
+ " 0.300973 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " figr | \n",
+ " 0.097069 | \n",
+ " 0.160378 | \n",
+ " 0.308434 | \n",
+ " 0.353000 | \n",
+ " 0.285211 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " scenicplus | \n",
+ " 0.275561 | \n",
+ " 0.351945 | \n",
+ " 0.631425 | \n",
+ " 0.515292 | \n",
+ " 0.315756 | \n",
+ " 1 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 8,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_scores = pd.read_csv(f\"resources/scores/full/skeleton_True/scgen_pearson-ridge.csv\", index_col=0)\n",
+ "df_scores = pd.read_csv(f\"resources/scores/50000-skeleton_True-binarize_True_pearson-ridge.csv\", index_col=0)\n",
"df_all_n = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n",
"df_scores['rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n",
"df_scores.style.background_gradient()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Format resourcs used"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 19,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(
\n",
- ""
- ],
- "text/plain": [
- " JobID JobName AllocCPUS Elapsed State MaxRSS \\\n",
- "portia 7744548.bat+ batch 20 0.153611 COMPLETED 5.854904 \n",
- "grnboost2 7742249.bat+ batch 20 1.568056 COMPLETED 3.067471 \n",
- "scenic 7742283.bat+ batch 20 1.908056 COMPLETED 30.356461 \n",
- "genie3 7742285.bat+ batch 20 16.682500 COMPLETED 13.105103 \n",
- "ppcor 7742364.bat+ batch 20 0.556667 COMPLETED 3.909119 \n",
- "scglue 7742343.bat+ batch 20 4.380278 FAILED 29.917423 \n",
- "\n",
- " MaxVMSize \n",
- "portia 6.284901 \n",
- "grnboost2 3.563801 \n",
- "scenic 32.573463 \n",
- "genie3 13.563530 \n",
- "ppcor 4.283978 \n",
- "scglue 35.933720 "
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "if False: # HVGs: extract resources local jobs\n",
- " job_ids_dict_hvg = {\n",
- " 'portia': 7744548,\n",
- " 'grnboost2': 7742249,\n",
- " 'scenic': 7742283,\n",
- " 'genie3': 7742285,\n",
- " 'ppcor': 7742364,\n",
- " 'scglue': 7742343,\n",
- " }\n",
- " \n",
- " df_resources = process_trace_local(job_ids_dict_hvg)\n",
- " df_resources\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Merge scores with resources"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "ename": "FileNotFoundError",
- "evalue": "[Errno 2] No such file or directory: 'resources/results/scores/d0_hvg/scgen_pearson-ridge.csv'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[20], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m df_res \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/results/trace/trace_hvg.csv\u001b[39m\u001b[38;5;124m'\u001b[39m, index_col\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m df_scores \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mresources/results/scores/d0_hvg/scgen_pearson-ridge.csv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
- "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'resources/results/scores/d0_hvg/scgen_pearson-ridge.csv'"
- ]
- }
- ],
- "source": [
- "df_res = pd.read_csv('resources/results/trace/trace_hvg.csv', index_col=0)\n",
- "df_scores = pd.read_csv('resources/results/scores/d0_hvg/scgen_pearson-ridge.csv', index_col=0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " method_name | \n",
- " S1 | \n",
- " S2 | \n",
- " static-theta-0.0 | \n",
- " static-theta-0.5 | \n",
- " overall_score | \n",
- " Peak memory (GB) | \n",
- " Duration (hour) | \n",
+ " method_name | \n",
+ " S1 | \n",
+ " S2 | \n",
+ " static-theta-0.0 | \n",
+ " static-theta-0.5 | \n",
+ " static-theta-1.0 | \n",
+ " overall_score | \n",
+ " Duration (hour) | \n",
+ " Peak memory (GB) | \n",
"
\n",
" \n",
" \n",
@@ -944,9 +746,10 @@
" collectri | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 0.500809 | \n",
- " 0.086811 | \n",
- " 0.146905 | \n",
+ " 0.162018 | \n",
+ " 0.220172 | \n",
+ " 0.225904 | \n",
+ " 0.121619 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" \n",
@@ -955,121 +758,144 @@
" negative_control | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
+ " 0.082188 | \n",
+ " 0.170247 | \n",
+ " 0.224394 | \n",
+ " 0.095366 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" \n",
" \n",
" 2 | \n",
" positive_control | \n",
- " 0.517356 | \n",
" 1.000000 | \n",
- " 0.571681 | \n",
- " 0.763590 | \n",
- " 0.713157 | \n",
- " 4.900000 | \n",
- " 0.075278 | \n",
+ " 1.000000 | \n",
+ " 0.842557 | \n",
+ " 0.700505 | \n",
+ " 0.482437 | \n",
+ " 0.805100 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" pearson_corr | \n",
- " 0.706973 | \n",
- " 0.879884 | \n",
- " 0.946418 | \n",
- " 0.354523 | \n",
- " 0.721949 | \n",
- " 0.975100 | \n",
- " 0.072500 | \n",
+ " 0.798299 | \n",
+ " 0.840802 | \n",
+ " 0.842404 | \n",
+ " 0.687843 | \n",
+ " 0.461208 | \n",
+ " 0.726111 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" portia | \n",
- " 0.390888 | \n",
- " 0.392604 | \n",
- " 0.435863 | \n",
- " 0.117372 | \n",
- " 0.334182 | \n",
- " 5.854904 | \n",
- " 0.153611 | \n",
+ " 0.400603 | \n",
+ " 0.497954 | \n",
+ " 0.639161 | \n",
+ " 0.338981 | \n",
+ " 0.275428 | \n",
+ " 0.430425 | \n",
+ " 2.491111 | \n",
+ " 55.685230 | \n",
"
\n",
" \n",
" 5 | \n",
" ppcor | \n",
- " 0.059957 | \n",
- " 0.162584 | \n",
- " 0.342733 | \n",
- " 0.038118 | \n",
- " 0.150848 | \n",
- " 3.909119 | \n",
- " 0.556667 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.347992 | \n",
+ " 0.235417 | \n",
+ " 0.216236 | \n",
+ " 0.159929 | \n",
+ " 13.425833 | \n",
+ " 64.136433 | \n",
"
\n",
" \n",
" 6 | \n",
- " genie3 | \n",
- " 0.977976 | \n",
- " 0.847163 | \n",
- " 0.952597 | \n",
- " 0.684918 | \n",
- " 0.865663 | \n",
- " 13.105103 | \n",
- " 16.682500 | \n",
+ " grnboost2 | \n",
+ " 0.971633 | \n",
+ " 0.780225 | \n",
+ " 0.895890 | \n",
+ " 0.946268 | \n",
+ " 0.782102 | \n",
+ " 0.875223 | \n",
+ " 7.510556 | \n",
+ " 7.378796 | \n",
"
\n",
" \n",
" 7 | \n",
- " grnboost2 | \n",
- " 1.000000 | \n",
- " 0.794475 | \n",
- " 1.000000 | \n",
- " 1.000000 | \n",
- " 0.948619 | \n",
- " 3.067471 | \n",
- " 1.568056 | \n",
+ " scenic | \n",
+ " 0.464032 | \n",
+ " 0.397320 | \n",
+ " 0.783895 | \n",
+ " 0.745078 | \n",
+ " 0.852167 | \n",
+ " 0.648498 | \n",
+ " 24.008611 | \n",
+ " 35.954300 | \n",
"
\n",
" \n",
" 8 | \n",
- " scenic | \n",
- " 0.387244 | \n",
- " 0.370915 | \n",
- " 0.691114 | \n",
- " 0.662750 | \n",
- " 0.528006 | \n",
- " 30.356461 | \n",
- " 1.908056 | \n",
+ " granie | \n",
+ " 0.229666 | \n",
+ " 0.177568 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.081447 | \n",
+ " 1.012038 | \n",
+ " 41.000000 | \n",
"
\n",
" \n",
" 9 | \n",
" scglue | \n",
- " 0.205518 | \n",
- " 0.412663 | \n",
- " 0.431359 | \n",
- " 0.204913 | \n",
- " 0.313613 | \n",
- " 29.917423 | \n",
- " 4.380278 | \n",
+ " 0.190305 | \n",
+ " 0.509488 | \n",
+ " 0.668041 | \n",
+ " 0.307502 | \n",
+ " 0.284899 | \n",
+ " 0.392047 | \n",
+ " 11.097500 | \n",
+ " 61.677879 | \n",
"
\n",
" \n",
" 10 | \n",
" celloracle | \n",
- " 0.569235 | \n",
- " 0.538077 | \n",
- " 0.757183 | \n",
- " 0.719505 | \n",
- " 0.646000 | \n",
- " 14.900000 | \n",
- " 1.472222 | \n",
+ " 0.616708 | \n",
+ " 0.483758 | \n",
+ " 0.885844 | \n",
+ " 0.821030 | \n",
+ " 0.685314 | \n",
+ " 0.698531 | \n",
+ " 3.765000 | \n",
+ " 41.601166 | \n",
"
\n",
" \n",
" 11 | \n",
- " pearson_causal | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.974700 | \n",
- " 0.064167 | \n",
+ " figr | \n",
+ " 0.340016 | \n",
+ " 0.322462 | \n",
+ " 0.305083 | \n",
+ " 0.378510 | \n",
+ " 0.349787 | \n",
+ " 0.339172 | \n",
+ " 6.731667 | \n",
+ " 225.208725 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " scenicplus | \n",
+ " 0.965246 | \n",
+ " 0.707635 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.934576 | \n",
+ " 11.740556 | \n",
+ " 131.342854 | \n",
"
\n",
" \n",
"
\n",
@@ -1077,35 +903,37 @@
],
"text/plain": [
" method_name S1 S2 static-theta-0.0 static-theta-0.5 \\\n",
- "0 collectri 0.000000 0.000000 0.500809 0.086811 \n",
- "1 negative_control 0.000000 0.000000 0.000000 0.000000 \n",
- "2 positive_control 0.517356 1.000000 0.571681 0.763590 \n",
- "3 pearson_corr 0.706973 0.879884 0.946418 0.354523 \n",
- "4 portia 0.390888 0.392604 0.435863 0.117372 \n",
- "5 ppcor 0.059957 0.162584 0.342733 0.038118 \n",
- "6 genie3 0.977976 0.847163 0.952597 0.684918 \n",
- "7 grnboost2 1.000000 0.794475 1.000000 1.000000 \n",
- "8 scenic 0.387244 0.370915 0.691114 0.662750 \n",
- "9 scglue 0.205518 0.412663 0.431359 0.204913 \n",
- "10 celloracle 0.569235 0.538077 0.757183 0.719505 \n",
- "11 pearson_causal 0.000000 0.000000 0.000000 0.000000 \n",
+ "0 collectri 0.000000 0.000000 0.162018 0.220172 \n",
+ "1 negative_control 0.000000 0.000000 0.082188 0.170247 \n",
+ "2 positive_control 1.000000 1.000000 0.842557 0.700505 \n",
+ "3 pearson_corr 0.798299 0.840802 0.842404 0.687843 \n",
+ "4 portia 0.400603 0.497954 0.639161 0.338981 \n",
+ "5 ppcor 0.000000 0.000000 0.347992 0.235417 \n",
+ "6 grnboost2 0.971633 0.780225 0.895890 0.946268 \n",
+ "7 scenic 0.464032 0.397320 0.783895 0.745078 \n",
+ "8 granie 0.229666 0.177568 0.000000 0.000000 \n",
+ "9 scglue 0.190305 0.509488 0.668041 0.307502 \n",
+ "10 celloracle 0.616708 0.483758 0.885844 0.821030 \n",
+ "11 figr 0.340016 0.322462 0.305083 0.378510 \n",
+ "12 scenicplus 0.965246 0.707635 1.000000 1.000000 \n",
"\n",
- " overall_score Peak memory (GB) Duration (hour) \n",
- "0 0.146905 0.000000 0.000000 \n",
- "1 0.000000 0.000000 0.000000 \n",
- "2 0.713157 4.900000 0.075278 \n",
- "3 0.721949 0.975100 0.072500 \n",
- "4 0.334182 5.854904 0.153611 \n",
- "5 0.150848 3.909119 0.556667 \n",
- "6 0.865663 13.105103 16.682500 \n",
- "7 0.948619 3.067471 1.568056 \n",
- "8 0.528006 30.356461 1.908056 \n",
- "9 0.313613 29.917423 4.380278 \n",
- "10 0.646000 14.900000 1.472222 \n",
- "11 0.000000 0.974700 0.064167 "
+ " static-theta-1.0 overall_score Duration (hour) Peak memory (GB) \n",
+ "0 0.225904 0.121619 0.000000 0.000000 \n",
+ "1 0.224394 0.095366 0.000000 0.000000 \n",
+ "2 0.482437 0.805100 0.000000 0.000000 \n",
+ "3 0.461208 0.726111 0.000000 0.000000 \n",
+ "4 0.275428 0.430425 2.491111 55.685230 \n",
+ "5 0.216236 0.159929 13.425833 64.136433 \n",
+ "6 0.782102 0.875223 7.510556 7.378796 \n",
+ "7 0.852167 0.648498 24.008611 35.954300 \n",
+ "8 0.000000 0.081447 1.012038 41.000000 \n",
+ "9 0.284899 0.392047 11.097500 61.677879 \n",
+ "10 0.685314 0.698531 3.765000 41.601166 \n",
+ "11 0.349787 0.339172 6.731667 225.208725 \n",
+ "12 1.000000 0.934576 11.740556 131.342854 "
]
},
- "execution_count": 14,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -1135,7 +963,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -1155,11 +983,11 @@
"\u001b[36mℹ\u001b[39m Please use `whereami::thisfile()` instead. \n",
"\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[1m\u001b[22mNew names:\n",
"\u001b[36m•\u001b[39m `` -> `...1`\n",
- "\u001b[1mRows: \u001b[22m\u001b[34m12\u001b[39m \u001b[1mColumns: \u001b[22m\u001b[34m13\u001b[39m\n",
+ "\u001b[1mRows: \u001b[22m\u001b[34m13\u001b[39m \u001b[1mColumns: \u001b[22m\u001b[34m14\u001b[39m\n",
"\u001b[36m──\u001b[39m \u001b[1mColumn specification\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n",
"\u001b[1mDelimiter:\u001b[22m \"\\t\"\n",
"\u001b[31mchr\u001b[39m (1): method_name\n",
- "\u001b[32mdbl\u001b[39m (12): ...1, S1, S2, static-theta-0.0, static-theta-0.5, overall_score, P...\n",
+ "\u001b[32mdbl\u001b[39m (13): ...1, S1, S2, static-theta-0.0, static-theta-0.5, static-theta-1.0...\n",
"\n",
"\u001b[36mℹ\u001b[39m Use `spec()` to retrieve the full column specification for this data.\n",
"\u001b[36mℹ\u001b[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.\n",
@@ -1188,8 +1016,8 @@
],
"source": [
"\n",
- "summary_file = \"output/summary_d0_hvg.tsv\"\n",
- "summary_figure = \"output/summary_d0_hvg_figure.pdf\"\n",
+ "summary_file = \"output/summary.tsv\"\n",
+ "summary_figure = \"output/summary_figure.pdf\"\n",
"\n",
"df_all['memory_log'] = np.log(df_all['Peak memory (GB)']+1)\n",
"df_all['memory_log'] = np.max(df_all['memory_log'])-df_all['memory_log']\n",
@@ -1201,1373 +1029,1036 @@
"df_all[\"duration_str\"] = df_all['Duration (hour)'].round(1).astype(str)\n",
"df_all['memory_str'] = df_all['Peak memory (GB)'].round(1).astype(str)\n",
"\n",
- "\n",
"df_all.to_csv(summary_file, sep='\\t')\n",
"\n",
- "!Rscript ../grn_benchmark/src/metrics_figure.R {summary_file} {summary_figure}\n"
+ "!Rscript ../grn_benchmark/src/summary_figure.R {summary_file} {summary_figure}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# All layers"
+ "# Robustness analysis"
]
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])"
- ]
- },
- "execution_count": 60,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Submitted batch job 7765072\n"
+ ]
}
],
"source": [
- "np.arange(0, 1, .1)"
+ "if True:\n",
+ " !sbatch scripts/sbatch/robustness_analysis.sh # !python src/robustness_analysis/script_all.py\n",
+ "base_dir = 'resources/results/robustness_analysis'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def format_robustness_results(base_dir, noise_type='net'):\n",
+ " degrees = [0, 10, 20, 50, 100]\n",
+ " reg1_metric = 'S1'\n",
+ " reg2_metric = 'static-theta-0.5'\n",
+ " for i, degree in enumerate(degrees):\n",
+ " df = pd.read_csv(f'{base_dir}/{noise_type}-{degree}-scores.csv',index_col=0)\n",
+ " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:degree})\n",
+ " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:degree})\n",
+ " \n",
+ " if i == 0:\n",
+ " reg1_scores = df_reg1\n",
+ " reg2_scores = df_reg2\n",
+ " else:\n",
+ " reg1_scores = pd.concat([reg1_scores, df_reg1], axis=1)\n",
+ " reg2_scores = pd.concat([reg2_scores, df_reg2], axis=1)\n",
+ " \n",
+ " reg1_scores = reg1_scores.T\n",
+ " reg2_scores = reg2_scores.T\n",
+ " return reg1_scores, reg2_scores"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Permute net"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# net \n",
+ "noise_type = 'net'\n",
+ "reg1_scores, reg2_scores = format_robustness_results(base_dir, noise_type=noise_type)"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- "
\n",
+ "\n",
" \n",
" \n",
" | \n",
- " collectri | \n",
- " negative_control | \n",
- " positive_control | \n",
- " pearson_corr | \n",
- " portia | \n",
- " ppcor | \n",
- " genie3 | \n",
- " grnboost2 | \n",
- " scenic | \n",
- " scglue | \n",
- " celloracle | \n",
+ " collectri | \n",
+ " negative_control | \n",
+ " positive_control | \n",
+ " pearson_corr | \n",
+ " portia | \n",
+ " ppcor | \n",
+ " grnboost2 | \n",
+ " scenic | \n",
+ " granie | \n",
+ " scglue | \n",
+ " celloracle | \n",
+ " figr | \n",
+ " scenicplus | \n",
"
\n",
" \n",
" \n",
" \n",
- " lognorm | \n",
- " -0.050548 | \n",
- " -0.042234 | \n",
- " 0.082101 | \n",
- " 0.046997 | \n",
- " 0.014856 | \n",
- " 0.003078 | \n",
- " 0.073206 | \n",
- " 0.091619 | \n",
- " 0.068810 | \n",
- " 0.027315 | \n",
- " 0.068165 | \n",
- "
\n",
- " \n",
- " pearson | \n",
- " -0.095474 | \n",
- " -0.041130 | \n",
- " 0.163470 | \n",
- " 0.217511 | \n",
- " 0.113185 | \n",
- " 0.016686 | \n",
- " 0.295920 | \n",
- " 0.299160 | \n",
- " 0.117241 | \n",
- " 0.061848 | \n",
- " 0.171422 | \n",
- "
\n",
- " \n",
- " seurat_lognorm | \n",
- " -0.052159 | \n",
- " -0.041432 | \n",
- " 0.087152 | \n",
- " 0.053505 | \n",
- " 0.017466 | \n",
- " 0.003723 | \n",
- " 0.081703 | \n",
- " 0.105108 | \n",
- " 0.079217 | \n",
- " 0.031597 | \n",
- " 0.078648 | \n",
- "
\n",
- " \n",
- " seurat_pearson | \n",
- " -0.095343 | \n",
- " -0.041747 | \n",
- " 0.174773 | \n",
- " 0.219817 | \n",
- " 0.108111 | \n",
- " 0.016615 | \n",
- " 0.293678 | \n",
- " 0.301832 | \n",
- " 0.122765 | \n",
- " 0.064890 | \n",
- " 0.181489 | \n",
- "
\n",
- " \n",
- " scgen_lognorm | \n",
- " -0.059849 | \n",
- " -0.041816 | \n",
- " 0.160466 | \n",
- " 0.094411 | \n",
- " 0.055487 | \n",
- " 0.008795 | \n",
- " 0.162319 | \n",
- " 0.219727 | \n",
- " 0.147812 | \n",
- " 0.060572 | \n",
- " 0.150424 | \n",
- "
\n",
- " \n",
- " scgen_pearson | \n",
- " -0.100238 | \n",
- " -0.039305 | \n",
- " 0.197129 | \n",
- " 0.269379 | \n",
- " 0.148941 | \n",
- " 0.022846 | \n",
- " 0.372641 | \n",
- " 0.381032 | \n",
- " 0.147553 | \n",
- " 0.078309 | \n",
- " 0.216897 | \n",
+ " 0 | \n",
+ " -0.052885 | \n",
+ " -0.038053 | \n",
+ " 0.285482 | \n",
+ " 0.227900 | \n",
+ " 0.114365 | \n",
+ " -0.009030 | \n",
+ " 0.277384 | \n",
+ " 0.132838 | \n",
+ " 0.065566 | \n",
+ " 0.054329 | \n",
+ " 0.177773 | \n",
+ " 0.097069 | \n",
+ " 0.275561 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " -0.063194 | \n",
+ " -0.037703 | \n",
+ " 0.258300 | \n",
+ " 0.201382 | \n",
+ " 0.094161 | \n",
+ " -0.016007 | \n",
+ " 0.246858 | \n",
+ " 0.117373 | \n",
+ " 0.052529 | \n",
+ " 0.046615 | \n",
+ " 0.159076 | \n",
+ " 0.085768 | \n",
+ " 0.259247 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " -0.068023 | \n",
+ " -0.038826 | \n",
+ " 0.232850 | \n",
+ " 0.188608 | \n",
+ " 0.076579 | \n",
+ " -0.025485 | \n",
+ " 0.213148 | \n",
+ " 0.103506 | \n",
+ " 0.043782 | \n",
+ " 0.037751 | \n",
+ " 0.145942 | \n",
+ " 0.079237 | \n",
+ " 0.243952 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " -0.083635 | \n",
+ " -0.035601 | \n",
+ " 0.180567 | \n",
+ " 0.147512 | \n",
+ " 0.026785 | \n",
+ " -0.041553 | \n",
+ " 0.147266 | \n",
+ " 0.060212 | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " -0.084617 | \n",
+ " -0.038591 | \n",
+ " -0.009365 | \n",
+ " -0.045844 | \n",
+ " -0.052908 | \n",
+ " -0.064681 | \n",
+ " -0.057610 | \n",
+ " -0.016075 | \n",
+ " -0.004035 | \n",
+ " -0.045894 | \n",
+ " -0.019244 | \n",
+ " -0.013468 | \n",
+ " -0.006252 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 17,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "base_dir = 'resources/scores/d0_hvg'\n",
- "layers = ['lognorm','pearson', 'seurat_lognorm', 'seurat_pearson', 'scgen_lognorm', 'scgen_pearson']\n",
- "reg_type = 'ridge'\n",
- "reg1_metric = 'S1'\n",
- "reg2_metric = 'static-theta-0.5'\n",
- "for i, layer in enumerate(layers):\n",
- " df = pd.read_csv(f'{base_dir}/{layer}-{reg_type}.csv',index_col=0)\n",
- " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:layer})\n",
- " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:layer})\n",
- " \n",
- " if i == 0:\n",
- " reg1_scores_layers = df_reg1\n",
- " reg2_scores_layers = df_reg2\n",
- " else:\n",
- " reg1_scores_layers = pd.concat([reg1_scores_layers, df_reg1], axis=1)\n",
- " reg2_scores_layers = pd.concat([reg2_scores_layers, df_reg2], axis=1)\n",
- " \n",
- "reg1_scores_layers = reg1_scores_layers.T\n",
- "reg2_scores_layers = reg2_scores_layers.T\n",
- "\n",
- "reg1_scores_layers.to_csv('../grn_benchmark/results_folder/scores/reg1_scores_layers_hvgs.csv')\n",
- "reg2_scores_layers.to_csv('../grn_benchmark/results_folder/scores/reg2_scores_layers_hvgs.csv')\n",
- "\n",
- "reg1_scores_layers.style.background_gradient()\n"
+ "reg1_scores.style.background_gradient()"
]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- "\n",
+ "\n",
" \n",
" \n",
" | \n",
- " collectri | \n",
- " negative_control | \n",
- " positive_control | \n",
- " pearson_corr | \n",
- " portia | \n",
- " ppcor | \n",
- " grnboost2 | \n",
- " scenic | \n",
- " granie | \n",
- " scglue | \n",
- " celloracle | \n",
+ " collectri | \n",
+ " negative_control | \n",
+ " positive_control | \n",
+ " pearson_corr | \n",
+ " portia | \n",
+ " ppcor | \n",
+ " grnboost2 | \n",
+ " scenic | \n",
+ " granie | \n",
+ " scglue | \n",
+ " celloracle | \n",
+ " figr | \n",
+ " scenicplus | \n",
"
\n",
" \n",
" \n",
" \n",
- " lognorm | \n",
- " -0.050548 | \n",
- " -0.043757 | \n",
- " 0.082101 | \n",
- " 0.062031 | \n",
- " 0.010746 | \n",
- " 0.001611 | \n",
- " 0.118505 | \n",
- " 0.075060 | \n",
- " 0.044887 | \n",
- " 0.035910 | \n",
- " 0.069066 | \n",
- "
\n",
- " \n",
- " pearson | \n",
- " -0.095474 | \n",
- " -0.043600 | \n",
- " 0.163470 | \n",
- " 0.222177 | \n",
- " 0.082329 | \n",
- " 0.014658 | \n",
- " 0.332626 | \n",
- " 0.130075 | \n",
- " 0.070551 | \n",
- " 0.079671 | \n",
- " 0.167634 | \n",
- "
\n",
- " \n",
- " seurat_lognorm | \n",
- " -0.052159 | \n",
- " -0.044382 | \n",
- " 0.087152 | \n",
- " 0.068895 | \n",
- " 0.011506 | \n",
- " 0.002132 | \n",
- " 0.135575 | \n",
- " 0.080760 | \n",
- " 0.049640 | \n",
- " 0.039661 | \n",
- " 0.080605 | \n",
- "
\n",
- " \n",
- " seurat_pearson | \n",
- " -0.095343 | \n",
- " -0.042833 | \n",
- " 0.174773 | \n",
- " 0.226611 | \n",
- " 0.087287 | \n",
- " 0.016914 | \n",
- " 0.339841 | \n",
- " 0.136727 | \n",
- " 0.075773 | \n",
- " 0.079768 | \n",
- " 0.174873 | \n",
- "
\n",
- " \n",
- " scgen_lognorm | \n",
- " -0.059849 | \n",
- " -0.044936 | \n",
- " 0.160466 | \n",
- " 0.117994 | \n",
- " 0.029785 | \n",
- " 0.005762 | \n",
- " 0.263282 | \n",
- " 0.152383 | \n",
- " 0.090862 | \n",
- " 0.071800 | \n",
- " 0.153270 | \n",
- "
\n",
- " \n",
- " scgen_pearson | \n",
- " -0.100238 | \n",
- " -0.044574 | \n",
- " 0.197129 | \n",
- " 0.273443 | \n",
- " 0.101110 | \n",
- " 0.017954 | \n",
- " 0.421936 | \n",
- " 0.172085 | \n",
- " 0.083298 | \n",
- " 0.099859 | \n",
- " 0.209151 | \n",
+ " 0 | \n",
+ " 0.314904 | \n",
+ " 0.298416 | \n",
+ " 0.438187 | \n",
+ " 0.434587 | \n",
+ " 0.337533 | \n",
+ " 0.316130 | \n",
+ " 0.501373 | \n",
+ " 0.447392 | \n",
+ " 0.268367 | \n",
+ " 0.335675 | \n",
+ " 0.467258 | \n",
+ " 0.354542 | \n",
+ " 0.514954 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 0.310607 | \n",
+ " 0.300799 | \n",
+ " 0.433080 | \n",
+ " 0.427072 | \n",
+ " 0.337533 | \n",
+ " 0.314340 | \n",
+ " 0.492975 | \n",
+ " 0.439982 | \n",
+ " 0.263060 | \n",
+ " 0.334209 | \n",
+ " 0.453522 | \n",
+ " 0.356273 | \n",
+ " 0.506758 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 0.312346 | \n",
+ " 0.298023 | \n",
+ " 0.428219 | \n",
+ " 0.416558 | \n",
+ " 0.334752 | \n",
+ " 0.307381 | \n",
+ " 0.481193 | \n",
+ " 0.427551 | \n",
+ " 0.268367 | \n",
+ " 0.330640 | \n",
+ " 0.444836 | \n",
+ " 0.351414 | \n",
+ " 0.498718 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " 0.308445 | \n",
+ " 0.300843 | \n",
+ " 0.413975 | \n",
+ " 0.401897 | \n",
+ " 0.322256 | \n",
+ " 0.300537 | \n",
+ " 0.439232 | \n",
+ " 0.399823 | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ " nan | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " 0.295797 | \n",
+ " 0.303241 | \n",
+ " 0.379001 | \n",
+ " 0.345088 | \n",
+ " 0.291961 | \n",
+ " 0.286424 | \n",
+ " 0.293421 | \n",
+ " 0.317192 | \n",
+ " 0.318627 | \n",
+ " 0.289704 | \n",
+ " 0.310580 | \n",
+ " 0.316354 | \n",
+ " 0.321514 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 21,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "base_dir = 'resources/scores/'\n",
- "layers = ['lognorm','pearson', 'seurat_lognorm', 'seurat_pearson', 'scgen_lognorm', 'scgen_pearson']\n",
- "reg_type = 'ridge'\n",
- "reg1_metric = 'S1'\n",
- "reg2_metric = 'static-theta-0.5'\n",
- "for i, layer in enumerate(layers):\n",
- " df = pd.read_csv(f'{base_dir}/{layer}-{reg_type}.csv',index_col=0)\n",
- " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:layer})\n",
- " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:layer})\n",
- " \n",
- " if i == 0:\n",
- " reg1_scores_layers = df_reg1\n",
- " reg2_scores_layers = df_reg2\n",
- " else:\n",
- " reg1_scores_layers = pd.concat([reg1_scores_layers, df_reg1], axis=1)\n",
- " reg2_scores_layers = pd.concat([reg2_scores_layers, df_reg2], axis=1)\n",
- " \n",
- "reg1_scores_layers = reg1_scores_layers.T\n",
- "reg2_scores_layers = reg2_scores_layers.T\n",
- "\n",
- "reg1_scores_layers.to_csv('../grn_benchmark/results_folder/scores/reg1_scores_layers.csv')\n",
- "reg2_scores_layers.to_csv('../grn_benchmark/results_folder/scores/reg2_scores_layers.csv')\n",
- "\n",
- "reg1_scores_layers.style.background_gradient()\n"
+ "reg2_scores.style.background_gradient()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Robustness analysis"
+ "## Permute sign"
]
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 12,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Submitted batch job 7752429\n"
- ]
- }
- ],
- "source": [
- "# !python src/robustness_analysis/script_all.py\n",
- "if True:\n",
- " !sbatch scripts/sbatch/robustness_analysis.sh"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Permute"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "ename": "FileNotFoundError",
- "evalue": "[Errno 2] No such file or directory: 'resources/results/robustness_analysis/net-0-scores.csv'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m reg2_metric \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstatic-theta-0.5\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, degree \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(degrees):\n\u001b[0;32m----> 8\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mbase_dir\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnoise_type\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m-\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mdegree\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m-scores.csv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m df_reg1 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[:, [reg1_metric]]\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{reg1_metric:degree})\n\u001b[1;32m 10\u001b[0m df_reg2 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[:, [reg2_metric]]\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{reg2_metric:degree})\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
- "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
- "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'resources/results/robustness_analysis/net-0-scores.csv'"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "# net \n",
- "noise_type = 'net'\n",
- "base_dir = 'resources/results/robustness_analysis'\n",
- "degrees = [0, 10, 20, 50, 100]\n",
- "reg1_metric = 'S1'\n",
- "reg2_metric = 'static-theta-0.5'\n",
- "for i, degree in enumerate(degrees):\n",
- " df = pd.read_csv(f'{base_dir}/{noise_type}-{degree}-scores.csv',index_col=0)\n",
- " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:degree})\n",
- " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:degree})\n",
- " \n",
- " if i == 0:\n",
- " reg1_scores_layers = df_reg1\n",
- " reg2_scores_layers = df_reg2\n",
- " else:\n",
- " reg1_scores_layers = pd.concat([reg1_scores_layers, df_reg1], axis=1)\n",
- " reg2_scores_layers = pd.concat([reg2_scores_layers, df_reg2], axis=1)\n",
- " \n",
- "reg1_scores_layers = reg1_scores_layers.T\n",
- "reg2_scores_layers = reg2_scores_layers.T\n",
- "reg1_scores_layers.style.background_gradient()\n"
+ "noise_type = 'sign'\n",
+ "reg1_scores, reg2_scores = format_robustness_results(base_dir, noise_type=noise_type)"
]
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- "\n",
+ "\n",
" \n",
" \n",
" | \n",
- " collectri | \n",
- " negative_control | \n",
- " positive_control | \n",
- " pearson_corr | \n",
- " pearson_causal | \n",
- " portia | \n",
- " ppcor | \n",
- " genie3 | \n",
- " grnboost2 | \n",
- " scenic | \n",
- " scglue | \n",
- " celloracle | \n",
+ " collectri | \n",
+ " negative_control | \n",
+ " positive_control | \n",
+ " pearson_corr | \n",
+ " portia | \n",
+ " ppcor | \n",
+ " grnboost2 | \n",
+ " scenic | \n",
+ " granie | \n",
+ " scglue | \n",
+ " celloracle | \n",
+ " figr | \n",
+ " scenicplus | \n",
"
\n",
" \n",
" \n",
" \n",
- " 0 | \n",
- " 0.514896 | \n",
- " 0.505002 | \n",
- " 0.574608 | \n",
- " 0.524232 | \n",
- " 0.560490 | \n",
- " 0.518048 | \n",
- " 0.509874 | \n",
- " 0.576580 | \n",
- " 0.609075 | \n",
- " 0.574294 | \n",
- " 0.527076 | \n",
- " 0.580147 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 0.515406 | \n",
- " 0.505691 | \n",
- " 0.571882 | \n",
- " 0.523067 | \n",
- " 0.555572 | \n",
- " 0.524393 | \n",
- " 0.516036 | \n",
- " 0.570821 | \n",
- " 0.601759 | \n",
- " 0.565942 | \n",
- " 0.530801 | \n",
- " 0.573236 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 0.511172 | \n",
- " 0.504663 | \n",
- " 0.555954 | \n",
- " 0.526195 | \n",
- " 0.552449 | \n",
- " 0.515956 | \n",
- " 0.515292 | \n",
- " 0.569976 | \n",
- " 0.596415 | \n",
- " 0.561619 | \n",
- " 0.526821 | \n",
- " 0.563653 | \n",
- "
\n",
- " \n",
- " 50 | \n",
- " 0.511803 | \n",
- " 0.495854 | \n",
- " 0.552935 | \n",
- " 0.524744 | \n",
- " 0.534764 | \n",
- " 0.516056 | \n",
- " 0.510711 | \n",
- " 0.556661 | \n",
- " 0.581920 | \n",
- " 0.548328 | \n",
- " 0.518366 | \n",
- " 0.542993 | \n",
- "
\n",
- " \n",
- " 100 | \n",
- " 0.505540 | \n",
- " 0.503372 | \n",
- " 0.531005 | \n",
- " 0.513729 | \n",
- " 0.523002 | \n",
- " 0.506760 | \n",
- " 0.513287 | \n",
- " 0.504832 | \n",
- " 0.514585 | \n",
- " 0.504093 | \n",
- " 0.506443 | \n",
- " 0.513158 | \n",
+ " 0 | \n",
+ " -0.052885 | \n",
+ " -0.038053 | \n",
+ " 0.285482 | \n",
+ " 0.227900 | \n",
+ " 0.114365 | \n",
+ " -0.009030 | \n",
+ " 0.277384 | \n",
+ " 0.132916 | \n",
+ " 0.065566 | \n",
+ " 0.054329 | \n",
+ " 0.177773 | \n",
+ " 0.097069 | \n",
+ " 0.275561 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " -0.063294 | \n",
+ " -0.037943 | \n",
+ " 0.244253 | \n",
+ " 0.175927 | \n",
+ " 0.068840 | \n",
+ " -0.026427 | \n",
+ " 0.225111 | \n",
+ " 0.097068 | \n",
+ " 0.043191 | \n",
+ " 0.039099 | \n",
+ " 0.130693 | \n",
+ " 0.066178 | \n",
+ " 0.232978 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " -0.078692 | \n",
+ " -0.037331 | \n",
+ " 0.195138 | \n",
+ " 0.143907 | \n",
+ " 0.030350 | \n",
+ " -0.041580 | \n",
+ " 0.162380 | \n",
+ " 0.059698 | \n",
+ " 0.023835 | \n",
+ " 0.021276 | \n",
+ " 0.078184 | \n",
+ " 0.049052 | \n",
+ " 0.183118 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " -0.092569 | \n",
+ " -0.034971 | \n",
+ " -0.018533 | \n",
+ " -0.071464 | \n",
+ " -0.036026 | \n",
+ " -0.065349 | \n",
+ " -0.034382 | \n",
+ " -0.023101 | \n",
+ " -0.002446 | \n",
+ " -0.024859 | \n",
+ " -0.027821 | \n",
+ " -0.014739 | \n",
+ " -0.006185 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " -0.052885 | \n",
+ " -0.038053 | \n",
+ " 0.285482 | \n",
+ " 0.227900 | \n",
+ " 0.114365 | \n",
+ " -0.009030 | \n",
+ " 0.277384 | \n",
+ " 0.132916 | \n",
+ " 0.065566 | \n",
+ " 0.054329 | \n",
+ " 0.177773 | \n",
+ " 0.097069 | \n",
+ " 0.275561 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 48,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "reg2_scores_layers.style.background_gradient()"
+ "reg1_scores.style.background_gradient()"
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- "\n",
+ "\n",
" \n",
" \n",
" | \n",
- " collectri | \n",
- " negative_control | \n",
- " positive_control | \n",
- " pearson_corr | \n",
- " pearson_causal | \n",
- " portia | \n",
- " ppcor | \n",
- " genie3 | \n",
- " grnboost2 | \n",
- " scenic | \n",
- " scglue | \n",
- " celloracle | \n",
+ " collectri | \n",
+ " negative_control | \n",
+ " positive_control | \n",
+ " pearson_corr | \n",
+ " portia | \n",
+ " ppcor | \n",
+ " grnboost2 | \n",
+ " scenic | \n",
+ " granie | \n",
+ " scglue | \n",
+ " celloracle | \n",
+ " figr | \n",
+ " scenicplus | \n",
"
\n",
" \n",
" \n",
" \n",
- " 0 | \n",
- " -0.100238 | \n",
- " -0.043795 | \n",
- " 0.489147 | \n",
- " 0.238664 | \n",
- " 0.355256 | \n",
- " 0.148941 | \n",
- " 0.022846 | \n",
- " 0.372641 | \n",
- " 0.381032 | \n",
- " 0.147553 | \n",
- " 0.078309 | \n",
- " 0.216897 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " -0.107958 | \n",
- " -0.042199 | \n",
- " 0.433246 | \n",
- " 0.170612 | \n",
- " 0.297354 | \n",
- " 0.103448 | \n",
- " 0.018551 | \n",
- " 0.309243 | \n",
- " 0.320476 | \n",
- " 0.105850 | \n",
- " 0.051565 | \n",
- " 0.148519 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " -0.129616 | \n",
- " -0.042258 | \n",
- " 0.383169 | \n",
- " 0.109502 | \n",
- " 0.259201 | \n",
- " 0.064159 | \n",
- " -0.001810 | \n",
- " 0.203046 | \n",
- " 0.229562 | \n",
- " 0.067477 | \n",
- " 0.026945 | \n",
- " 0.080443 | \n",
- "
\n",
- " \n",
- " 50 | \n",
- " -0.154785 | \n",
- " -0.040425 | \n",
- " -0.091824 | \n",
- " -0.000377 | \n",
- " -0.188567 | \n",
- " -0.090789 | \n",
- " -0.013036 | \n",
- " -0.114169 | \n",
- " -0.122291 | \n",
- " -0.025912 | \n",
- " -0.010141 | \n",
- " -0.135087 | \n",
- "
\n",
- " \n",
- " 100 | \n",
- " -0.100238 | \n",
- " -0.043795 | \n",
- " 0.489147 | \n",
- " 0.238664 | \n",
- " 0.355256 | \n",
- " 0.148941 | \n",
- " 0.022846 | \n",
- " 0.372641 | \n",
- " 0.381032 | \n",
- " 0.147553 | \n",
- " 0.078309 | \n",
- " 0.216897 | \n",
+ " 0 | \n",
+ " 0.314904 | \n",
+ " 0.298416 | \n",
+ " 0.438187 | \n",
+ " 0.434587 | \n",
+ " 0.343306 | \n",
+ " 0.316130 | \n",
+ " 0.501373 | \n",
+ " 0.447392 | \n",
+ " 0.257013 | \n",
+ " 0.335675 | \n",
+ " 0.467258 | \n",
+ " 0.354542 | \n",
+ " 0.514954 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 0.315233 | \n",
+ " 0.298546 | \n",
+ " 0.438187 | \n",
+ " 0.434587 | \n",
+ " 0.343306 | \n",
+ " 0.316130 | \n",
+ " 0.501373 | \n",
+ " 0.447392 | \n",
+ " 0.257063 | \n",
+ " 0.335675 | \n",
+ " 0.467258 | \n",
+ " 0.354542 | \n",
+ " 0.514954 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 0.315206 | \n",
+ " 0.298645 | \n",
+ " 0.438187 | \n",
+ " 0.434587 | \n",
+ " 0.343306 | \n",
+ " 0.316130 | \n",
+ " 0.501373 | \n",
+ " 0.447392 | \n",
+ " 0.256943 | \n",
+ " 0.335675 | \n",
+ " 0.467258 | \n",
+ " 0.354542 | \n",
+ " 0.514954 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " 0.315445 | \n",
+ " 0.299033 | \n",
+ " 0.438187 | \n",
+ " 0.434587 | \n",
+ " 0.343306 | \n",
+ " 0.316130 | \n",
+ " 0.501373 | \n",
+ " 0.447392 | \n",
+ " 0.256891 | \n",
+ " 0.335675 | \n",
+ " 0.467258 | \n",
+ " 0.354542 | \n",
+ " 0.514954 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " 0.314412 | \n",
+ " 0.298592 | \n",
+ " 0.438187 | \n",
+ " 0.434587 | \n",
+ " 0.343306 | \n",
+ " 0.316130 | \n",
+ " 0.501373 | \n",
+ " 0.447392 | \n",
+ " 0.257129 | \n",
+ " 0.335675 | \n",
+ " 0.467258 | \n",
+ " 0.354542 | \n",
+ " 0.514954 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 3,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "noise_type = 'sign'\n",
- "base_dir = 'resources/results/robustness_analysis'\n",
- "degrees = [0, 10, 20, 50, 100]\n",
- "reg1_metric = 'S1'\n",
- "reg2_metric = 'static-theta-0.5'\n",
- "for i, degree in enumerate(degrees):\n",
- " df = pd.read_csv(f'{base_dir}/{noise_type}-{degree}-scores.csv',index_col=0)\n",
- " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:degree})\n",
- " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:degree})\n",
- " \n",
- " if i == 0:\n",
- " reg1_scores_layers = df_reg1\n",
- " reg2_scores_layers = df_reg2\n",
- " else:\n",
- " reg1_scores_layers = pd.concat([reg1_scores_layers, df_reg1], axis=1)\n",
- " reg2_scores_layers = pd.concat([reg2_scores_layers, df_reg2], axis=1)\n",
- " \n",
- "reg1_scores_layers = reg1_scores_layers.T\n",
- "reg2_scores_layers = reg2_scores_layers.T\n",
- "reg1_scores_layers.style.background_gradient()"
+ "reg2_scores.style.background_gradient()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Permute weight"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
- "data": {
- "text/html": [
- "\n",
- "\n",
- " \n",
- " \n",
- " | \n",
- " collectri | \n",
- " negative_control | \n",
- " positive_control | \n",
- " pearson_corr | \n",
- " pearson_causal | \n",
- " portia | \n",
- " ppcor | \n",
- " genie3 | \n",
- " grnboost2 | \n",
- " scenic | \n",
- " scglue | \n",
- " celloracle | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0.514896 | \n",
- " 0.505002 | \n",
- " 0.574608 | \n",
- " 0.524232 | \n",
- " 0.560490 | \n",
- " 0.518048 | \n",
- " 0.509874 | \n",
- " 0.576580 | \n",
- " 0.609075 | \n",
- " 0.574294 | \n",
- " 0.527076 | \n",
- " 0.580147 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 0.514113 | \n",
- " 0.504708 | \n",
- " 0.574608 | \n",
- " 0.524232 | \n",
- " 0.560490 | \n",
- " 0.518048 | \n",
- " 0.509874 | \n",
- " 0.576580 | \n",
- " 0.609075 | \n",
- " 0.574294 | \n",
- " 0.527076 | \n",
- " 0.580147 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 0.513857 | \n",
- " 0.505899 | \n",
- " 0.574608 | \n",
- " 0.524232 | \n",
- " 0.560490 | \n",
- " 0.518048 | \n",
- " 0.509874 | \n",
- " 0.576580 | \n",
- " 0.609075 | \n",
- " 0.574294 | \n",
- " 0.527076 | \n",
- " 0.580147 | \n",
- "
\n",
- " \n",
- " 50 | \n",
- " 0.512082 | \n",
- " 0.506397 | \n",
- " 0.574608 | \n",
- " 0.524232 | \n",
- " 0.560490 | \n",
- " 0.518048 | \n",
- " 0.509874 | \n",
- " 0.576580 | \n",
- " 0.609075 | \n",
- " 0.574294 | \n",
- " 0.527076 | \n",
- " 0.580147 | \n",
- "
\n",
- " \n",
- " 100 | \n",
- " 0.508349 | \n",
- " 0.508202 | \n",
- " 0.574608 | \n",
- " 0.524232 | \n",
- " 0.560490 | \n",
- " 0.518048 | \n",
- " 0.509874 | \n",
- " 0.576580 | \n",
- " 0.609075 | \n",
- " 0.574294 | \n",
- " 0.527076 | \n",
- " 0.580147 | \n",
- "
\n",
- " \n",
- "
\n"
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "FileNotFoundError",
+ "evalue": "[Errno 2] No such file or directory: 'resources/results/robustness_analysis/weight-20-scores.csv'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[15], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m noise_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 2\u001b[0m reg1_scores, reg2_scores \u001b[38;5;241m=\u001b[39m \u001b[43mformat_robustness_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnoise_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnoise_type\u001b[49m\u001b[43m)\u001b[49m\n",
+ "Cell \u001b[0;32mIn[7], line 6\u001b[0m, in \u001b[0;36mformat_robustness_results\u001b[0;34m(base_dir, noise_type)\u001b[0m\n\u001b[1;32m 4\u001b[0m reg2_metric \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstatic-theta-0.5\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, degree \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(degrees):\n\u001b[0;32m----> 6\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mbase_dir\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnoise_type\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m-\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mdegree\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m-scores.csv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m df_reg1 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[:, [reg1_metric]]\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{reg1_metric:degree})\n\u001b[1;32m 8\u001b[0m df_reg2 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[:, [reg2_metric]]\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{reg2_metric:degree})\n",
+ "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
+ "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
+ "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'resources/results/robustness_analysis/weight-20-scores.csv'"
+ ]
}
],
"source": [
- "reg2_scores_layers.style.background_gradient()"
+ "noise_type = 'weight'\n",
+ "reg1_scores, reg2_scores = format_robustness_results(base_dir, noise_type=noise_type)"
]
},
{
@@ -2579,7 +2070,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
diff --git a/scripts/sbatch/calculate_scores.sh b/scripts/sbatch/calculate_scores.sh
index 9533028d9..40b537003 100644
--- a/scripts/sbatch/calculate_scores.sh
+++ b/scripts/sbatch/calculate_scores.sh
@@ -5,7 +5,7 @@
#SBATCH --error=logs/%j.err
#SBATCH --mail-type=END
#SBATCH --mail-user=jalil.nourisa@gmail.com
-#SBATCH --mem=64G
+#SBATCH --mem=250G
#SBATCH --cpus-per-task=20
python src/metrics/script_all.py
diff --git a/scripts/sbatch/robustness_analysis.sh b/scripts/sbatch/robustness_analysis.sh
index 90c529a6d..a6dbf7883 100644
--- a/scripts/sbatch/robustness_analysis.sh
+++ b/scripts/sbatch/robustness_analysis.sh
@@ -5,7 +5,9 @@
#SBATCH --error=logs/%j.err
#SBATCH --mail-type=END
#SBATCH --mail-user=jalil.nourisa@gmail.com
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=20
+#SBATCH --mem=250G
+#SBATCH --cpus-per-task=20
+# SBATCH --partition=gpu
+# SBATCH --gres=gpu:1
python src/robustness_analysis/script_all.py
diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
index 8c97254be..89dac561f 100644
--- a/src/api/comp_method.yaml
+++ b/src/api/comp_method.yaml
@@ -47,6 +47,10 @@ functionality:
type: boolean
direction: input
default: True
+ - name: --causal
+ type: boolean
+ direction: input
+ default: True
test_resources:
diff --git a/src/control_methods/pearson_corr/config.vsh.yaml b/src/control_methods/pearson_corr/config.vsh.yaml
index 667b88d1c..3f597eb98 100644
--- a/src/control_methods/pearson_corr/config.vsh.yaml
+++ b/src/control_methods/pearson_corr/config.vsh.yaml
@@ -6,6 +6,11 @@ functionality:
info:
label: pearson_corr
summary: "Baseline based on correlation"
+ arguments:
+ - name: --normalize
+ type: boolean
+ default: True
+ direction: input
resources:
- type: python_script
diff --git a/src/control_methods/pearson_corr/script.py b/src/control_methods/pearson_corr/script.py
index 3020b57de..4868da533 100644
--- a/src/control_methods/pearson_corr/script.py
+++ b/src/control_methods/pearson_corr/script.py
@@ -14,22 +14,35 @@
'normalize': False,
'donor_specific': False,
'temp_dir': 'output/pearson_corr',
- 'causal': True}
+ 'causal': True,
+ 'normalize': True}
## VIASH END
import argparse
parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file')
-parser.add_argument('--multiomics_atac', type=str, help='Path to the multiomics RNA file')
parser.add_argument('--prediction', type=str, help='Path to the prediction file')
-parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
parser.add_argument('--num_workers', type=str, help='Number of cores')
parser.add_argument('--max_n_links', type=str, help='Number of top links to retain')
+parser.add_argument('--causal', action='store_true', help='Enable causal mode')
+parser.add_argument('--normalize', action='store_true')
+
args = parser.parse_args()
+
if args.multiomics_rna:
par['multiomics_rna'] = args.multiomics_rna
+if args.causal:
+ par['causal'] = True
+else:
+ par['causal'] = False
+
+if args.causal:
+ par['normalize'] = True
+else:
+ par['normalize'] = False
+
if args.prediction:
par['prediction'] = args.prediction
if args.tf_all:
@@ -38,9 +51,6 @@
par['num_workers'] = args.num_workers
if args.max_n_links:
par['max_n_links'] = int(args.max_n_links)
-if args.resources_dir:
- meta['resources_dir'] = args.resources_dir
-
os.makedirs(par['temp_dir'], exist_ok=True)
import sys
@@ -59,9 +69,11 @@ def create_corr_net(par):
print(par)
print('Read data')
adata = ad.read_h5ad(par["multiomics_rna"])
- # - lognorm
- sc.pp.normalize_total(adata)
- sc.pp.log1p(adata)
+ if 'normalize' in par:
+ if par['normalize']:
+ # - lognorm
+ sc.pp.normalize_total(adata)
+ sc.pp.log1p(adata)
# - corr
gene_names = adata.var_names.to_numpy()
grn = corr_net(adata.X, gene_names, par)
diff --git a/src/exp_analysis/helper.py b/src/exp_analysis/helper.py
index 4ad8b7a32..8d99d0ae7 100644
--- a/src/exp_analysis/helper.py
+++ b/src/exp_analysis/helper.py
@@ -106,7 +106,7 @@ def cosine_similarity_net(nets_dict, col_name='source', weight_col='weight', fig
return cosine_sim_matrix, fig
-def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='jaccard Similarity', ax=None):
+def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='jaccard Similarity', ax=None, fmt='.02f'):
from itertools import combinations
import seaborn as sns
import matplotlib.pyplot as plt
@@ -138,7 +138,7 @@ def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='ja
fig, ax = plt.subplots(1, 1, figsize=figsize)
else:
fig = None
- sns.heatmap(jaccard_matrix, annot=True, cmap="coolwarm", xticklabels=nets_names, yticklabels=nets_names, ax=ax)
+ sns.heatmap(jaccard_matrix, annot=True, cmap="viridis", xticklabels=nets_names, yticklabels=nets_names, ax=ax, fmt=fmt, cbar=None)
ax.grid(False)
ax.set_title(title)
# Rotate x labels for readability
@@ -146,7 +146,7 @@ def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='ja
return jaccard_matrix, fig
-def plot_cumulative_density(data, label='', ax=None, title=None, label=None, s=1, x_label='Weight', **kwdgs):
+def plot_cumulative_density(data, ax=None, title=None, label=None, s=3, x_label='Weight', **kwdgs):
# Sort the data
sorted_data = np.sort(data)
# Compute the cumulative density values
@@ -159,7 +159,9 @@ def plot_cumulative_density(data, label='', ax=None, title=None, label=None, s=1
ax.set_xlabel(x_label)
ax.set_ylabel('Cumulative Density')
ax.set_title(title)
- ax.grid(True)
+ ax.grid(True, linewidth=0.2, linestyle='--', color='gray')
+ for side in ['right', 'top']:
+ ax.spines[side].set_visible(False)
return fig, ax
class Connectivity:
@@ -224,6 +226,37 @@ def diff_roles(control: pd.DataFrame, sample: pd.DataFrame, critical_change_q_t:
# df_distance['critical_change_ps'] = df_distance['ps_distance'] > df_distance['ps_distance'].quantile(critical_change_q_t)
# df_distance['critical_change_overall'] = df_distance['overall_distance'] > df_distance['overall_distance'].quantile(critical_change_q_t)
return df_distance
+def find_peak_intersection(peaks, peaks_ref):
+ '''Find those peaks_ref intersect with peaks'''
+ # Convert arrays to structured data (chr, start, end)
+ def split_peaks(peak_array):
+ split_data = []
+ for p in peak_array:
+ try:
+ chr_, range_ = p.split(':')
+ start, end = map(int, range_.split('-'))
+ split_data.append((chr_, start, end))
+ except ValueError:
+ continue # Skip malformed peaks
+ return np.array(split_data, dtype=object)
+
+ peaks_struct = split_peaks(peaks)
+ peaks_ref_struct = split_peaks(peaks_ref)
+
+ # Optimize with NumPy broadcasting for faster intersection check
+ intersecting_peaks_ref = []
+
+ chr_peaks = peaks_struct[:, 0]
+ start_peaks = peaks_struct[:, 1].astype(int)
+ end_peaks = peaks_struct[:, 2].astype(int)
+
+ for chr_ref, start_ref, end_ref in peaks_ref_struct:
+ # Vectorized filtering for chromosome and overlap conditions
+ mask = (chr_peaks == chr_ref) & (start_peaks <= end_ref) & (end_peaks >= start_ref)
+ if np.any(mask):
+ intersecting_peaks_ref.append(f"{chr_ref}:{start_ref}-{end_ref}")
+
+ return intersecting_peaks_ref
class Exp_analysis:
'''
This class provides functions for explanatory analysis of GRNs
@@ -232,8 +265,10 @@ def __init__(self, net, peak_gene_net=None):
self.net = net
# self.net.weight = minmax_scale(self.net.weight)
self.net['link'] = self.net['source'].astype(str) + '_' + self.net['target'].astype(str)
-
self.peak_gene_net = peak_gene_net
+ if self.peak_gene_net is not None:
+ if 'peak' in peak_gene_net.columns:
+ peak_gene_net.rename(columns={'peak': 'source'}, inplace=True)
self.tfs = net.source.unique()
self.targets = net.target.unique()
# check duplicates
@@ -251,7 +286,7 @@ def __init__(self, net, peak_gene_net=None):
def plot_centrality_barh(df, title='',ax=None, xlabel='Degree', ylabel='Gene', colors=None):
if ax==None:
fig, ax = plt.subplots(figsize=(10, 6))
- df['degree'].plot(kind='barh', color='skyblue', ax=ax) # Pass ax to the plot method
+ df.plot(kind='barh', color='skyblue', ax=ax) # Pass ax to the plot method
ax.set_title(title)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
@@ -293,15 +328,33 @@ def subset_quantile(df, col_name='weight', top_q=0.95, top_n=None, ascending=Fal
else:
df = df.sort_values(by=col_name, ascending=ascending, key=abs)[:top_n]
return df
+
+
def annotate_peaks(self, annotation_df) -> dict[str, float]:
'''Annotate peaks with associated regions on genome.
'''
if self.peak_gene_net is None:
print('Peak gene net is not given. Peak annoation is skipped.')
return
- peaks = self.peak_gene_net.peak.unique()
+ # print(self.peak_gene_net)
+ peaks = self.peak_gene_net.source.unique()
peaks = self.format_peak(peaks)
- annotation_df = annotation_df[annotation_df.peak.isin(peaks)]
+
+ annotation_peaks = annotation_df.peak.unique()
+
+ flag = False
+ for peak in peaks:
+ if peak not in annotation_peaks:
+ flag = True
+
+
+ if flag:
+ print('Not all peaks in the net is among the annotated ones. Finding the overlap')
+ peaks = find_peak_intersection(peaks, annotation_df.peak.unique())
+ annotation_df = annotation_df[annotation_df.peak.isin(peaks)]
+ else:
+ annotation_df = annotation_df[annotation_df.peak.isin(peaks)]
+
value_counts = annotation_df.annotation.value_counts()
sum_values = value_counts.sum()
value_ratio = ((value_counts/sum_values)*100).round(1)
@@ -343,7 +396,7 @@ def calculate_feature(net, feature='active_sum_weight'):
return df
-def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_degree=None, color_map=None) -> plt.Figure:
+def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_degree=None, color_map=None, sort_by='degree') -> plt.Figure:
"""Upset plot of interactions
Args:
@@ -359,7 +412,7 @@ def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_de
out_dict = upsetplot.plot(upsetplot.from_indicators(indicators=lambda a: a==True, data=interaction_df), fig=fig,
show_counts=True,
show_percentages = '{:.0%}',
- # sort_by='cardinality',
+ sort_by=sort_by, #'cardinality'
# min_subset_size =".1%", # min interaction to show
min_subset_size = min_subset_size, # min interaction to show
min_degree=min_degree,
diff --git a/src/helper.py b/src/helper.py
index f21a3c6e7..11f2ce135 100644
--- a/src/helper.py
+++ b/src/helper.py
@@ -46,7 +46,6 @@ def calculate_scores():
"sbatch",
"scripts/sbatch/calculate_scores.sh"
]
-
# Print command to verify
subprocess.run(command)
@@ -84,46 +83,56 @@ def run_grn_seqera():
def run_grn_inference():
# par = {
# 'methods': ['portia'],
- # 'models_dir': 'resources/grn_models/',
- # 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad',
- # 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad',
+ # 'models_dir': 'resources/grn_models/mccalla/han',
+ # 'multiomics_rna': 'resources/grn-benchmark/mccalla/inference/han.h5ad',
# 'num_workers': 20,
- # 'mem': "120GB",
- # 'time': "24:00:00"
- # }
+ # 'mem': "250GB",
+ # 'time': "48:00:00",
+ # 'max_n_links': 100000,
+ # 'causal': False,
+ # 'normalize': False
+ # }
par = {
- 'methods': ["pearson_corr", "positive_control", "negative_control"],
+ 'methods': ['scenicplus'],
'models_dir': 'resources/grn_models/',
'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad',
- 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad',
+ 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad',
'num_workers': 20,
- 'mem': "120GB",
- 'time': "24:00:00"
+ 'mem': "400GB",
+ 'time': "48:00:00",
+ 'causal': False,
+ 'normalize': True,
+ 'max_n_links': 100000,
}
for method in par['methods']:
+ print(method)
par['prediction'] = f"{par['models_dir']}/{method}.csv"
# Method arguments
method_args = (f"--multiomics_rna {par['multiomics_rna']} "
- f"--multiomics_atac {par['multiomics_atac']} "
f"--prediction {par['prediction']} "
f"--num_workers {par['num_workers']} "
- f"--resources_dir src/utils")
+ f"--max_n_links {par['max_n_links']} ")
+ if par['causal']:
+ method_args += f"--causal "
# Determine the command based on the method
if method in ["pearson_corr", "positive_control", "negative_control"]:
+ if par['normalize']:
+ method_args += f"--normalize "
command = f"python src/control_methods/{method}/script.py {method_args}"
elif method == "celloracle":
+ method_args += f"--multiomics_atac {par['multiomics_atac']} "
command = (f"/home/jnourisa/miniconda3/envs/celloracle/bin/python "
f"src/methods/multi_omics/celloracle/script.py {method_args}")
elif method in ["grnboost2", "scenic", "genie3"]:
command = f"singularity exec ../../images/scenic python src/methods/single_omics/{method}/script.py {method_args}"
elif method == 'scglue':
+ method_args += f"--multiomics_atac {par['multiomics_atac']} "
command = f"singularity exec ../../images/scglue python src/methods/multi_omics/{method}/script.py {method_args}"
- elif method == 'ppcor':
- command = f"singularity exec ../../images/ppcor Rscript src/methods/single_omics/{method}/script.R {method_args}"
elif method == 'scenicplus':
+ method_args += f"--multiomics_atac {par['multiomics_atac']} "
command = f"singularity exec ../../images/scenicplus python src/methods/multi_omics/{method}/script.py {method_args}"
else:
command = f"singularity exec ../../images/{method} python src/methods/single_omics/{method}/script.py {method_args}"
@@ -139,11 +148,12 @@ def run_grn_inference():
# Add GPU partition if method is 'scglue'
if method == 'scglue':
full_tag += ["--partition=gpu", "--gres=gpu:1"]
-
+ if True:
+ full_tag += ["--partition=gpu", "--gres=gpu:1"]
# Run sbatch command
try:
- # result = subprocess.run(['sbatch'] + full_tag + ['scripts/sbatch/grn_inference.sh', command], check=True, capture_output=True, text=True)
- result = subprocess.run(['bash'] + ['scripts/sbatch/grn_inference.sh', command], check=True, capture_output=True, text=True)
+ result = subprocess.run(['sbatch'] + full_tag + ['scripts/sbatch/grn_inference.sh', command], check=True, capture_output=True, text=True)
+ # result = subprocess.run(['bash'] + ['scripts/sbatch/grn_inference.sh', command], check=True, capture_output=True, text=True)
print(f"Job {method} submitted successfully.")
print(result.stdout) # Print the standard output
@@ -201,6 +211,31 @@ def marco_data():
# print(f"{cell_type}-{GT}. adata shape: {adata.shape}, GT size: {GT_df.shape}, Gene overlap: {gene_overlap}")
# command = f"viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources_local/mccalla_extended/{cell_type}.h5ad --prediction resources_local/mccalla_extended/{cell_type}_{GT}.csv --layer norm --subsample {subsample} --apply_tf false --tf_all resources/prior/tf_all.csv --max_n_links -1 --verbose 1 --score output/{cell_type}_{GT}.h5ad"
# subprocess.run(command, shell=True, check=True)
+
+
+ all_gene_names = pd.read_csv('resources/prior/genome_annotation.tsv', sep='\t').Gene.unique().astype(str)
+
+ # adata_names = ['han', 'jackson', 'zhao', 'shalek']
+ # post_fixes = ['chipunion','KDunion', 'chipunion_KDUnion_intersect']
+ # for name in adata_names:
+ # print('------- ', name)
+ # adata = ad.read_h5ad(f'resources/grn-benchmark/mccalla/inference/{name}.h5ad')
+ # adata.var_names = adata.var_names.str.upper().astype(str)
+
+ # genes = adata.var_names
+ # print(len(genes),genes.isin(all_gene_names).sum())
+ # print(np.setdiff1d(genes, all_gene_names)[0:5])
+ # adata.write_h5ad(f'resources/grn-benchmark/mccalla/inference/{name}.h5ad')
+
+ # for post_fix in post_fixes:
+ # GT = pd.read_csv(f'resources/grn-benchmark/mccalla/evaluation/{name}_{post_fix}.csv')
+ # GT.source = GT.source.str.upper()
+ # GT.target = GT.target.str.upper()
+ # GT.to_csv(f'resources/grn-benchmark/mccalla/evaluation/{name}_{post_fix}.csv')
+ # tf_genes =set(GT.source) | set(GT.target)
+ # tf_genes = [name.upper() for name in tf_genes]
+ # # print(tf_genes)
+ # print('-- ', post_fix, len(tf_genes), np.intersect1d(list(tf_genes), genes).shape)
pass
def extract_data(data, reg='reg1', dataset_id='scgen_pearson'):
@@ -300,22 +335,39 @@ def elapsed_to_hours(elapsed_str):
time = time[0]
h, m, s = map(int, time.split(':'))
return day*24 + h + m / 60 + s / 3600
+ def reformat_data(df_local):
+ # Remove 'K' and convert to integers
+ df_local['MaxRSS'] = df_local['MaxRSS'].str.replace('K', '').astype(int)
+ df_local['MaxVMSize'] = df_local['MaxVMSize'].str.replace('K', '').astype(int)
+ df_local['Elapsed'] = df_local['Elapsed'].apply(lambda x: (elapsed_to_hours(x)))
+
+ # Convert MaxRSS and MaxVMSize from KB to GB
+ df_local['MaxRSS'] = df_local['MaxRSS'] / (1024 ** 2) # Convert KB to GB
+ df_local['MaxVMSize'] = df_local['MaxVMSize'] / (1024 ** 2) # Convert KB to GB
+ return df_local
for i, (name, job_id) in enumerate(job_ids_dict.items()):
- df = get_sacct_data(job_id)
+ if type(job_id)==list:
+
+ for i_sub, job_id_ in enumerate(job_id):
+ df_ = get_sacct_data(job_id_)
+ df_ = reformat_data(df_)
+ if i_sub == 0:
+ df = df_
+ else:
+ concat_df = pd.concat([df, df_], axis=0)
+ df['MaxVMSize'] = concat_df['MaxVMSize'].max()
+ df['MaxRSS'] = concat_df['MaxRSS'].max()
+ df['Elapsed'] = concat_df['Elapsed'].sum()
+ else:
+ df = get_sacct_data(job_id)
+ df = reformat_data(df)
df.index = [name]
if i==0:
df_local = df
else:
df_local = pd.concat([df_local, df], axis=0)
- # Remove 'K' and convert to integers
- df_local['MaxRSS'] = df_local['MaxRSS'].str.replace('K', '').astype(int)
- df_local['MaxVMSize'] = df_local['MaxVMSize'].str.replace('K', '').astype(int)
- df_local['Elapsed'] = df_local['Elapsed'].apply(lambda x: (elapsed_to_hours(x)))
-
- # Convert MaxRSS and MaxVMSize from KB to GB
- df_local['MaxRSS'] = df_local['MaxRSS'] / (1024 ** 2) # Convert KB to GB
- df_local['MaxVMSize'] = df_local['MaxVMSize'] / (1024 ** 2) # Convert KB to GB
+
return df_local
diff --git a/src/methods/multi_omics/celloracle/config.vsh.yaml b/src/methods/multi_omics/celloracle/config.vsh.yaml
index bc976fe5f..350fbf2e1 100644
--- a/src/methods/multi_omics/celloracle/config.vsh.yaml
+++ b/src/methods/multi_omics/celloracle/config.vsh.yaml
@@ -14,6 +14,7 @@ functionality:
type: file
direction: output
default: output/celloracle/base_grn.csv
+
# - name: --links
# type: file
# direction: output
diff --git a/src/methods/multi_omics/celloracle/main.py b/src/methods/multi_omics/celloracle/main.py
index a3307c288..320a418b9 100644
--- a/src/methods/multi_omics/celloracle/main.py
+++ b/src/methods/multi_omics/celloracle/main.py
@@ -14,8 +14,7 @@ def base_grn(par) -> None:
print("Reading atac data")
multiomics_atac = ad.read_h5ad(par["multiomics_atac"])
- # genomes_dir = par['temp_dir']
- genomes_dir = None
+
print("Format peak data")
peaks = multiomics_atac.var_names.to_numpy()
peaks = [peak.replace(':','_').replace("-",'_') for peak in peaks]
@@ -23,18 +22,22 @@ def base_grn(par) -> None:
tss_annotated['peak_id'] = tss_annotated['chr'].astype(str)+"_"+tss_annotated['start'].astype(str)+"_"+tss_annotated['end'].astype(str)
peak_gene = tss_annotated
- print("Install ref genome")
- genomepy.install_genome(name="hg38", provider="UCSC", genomes_dir=genomes_dir)
+ try:
+ print("Install ref genome")
+ genomepy.install_genome(name="hg38", provider="UCSC", genomes_dir=None)
+ except:
+ print("Couldnt install genome. Will look for the default location")
+
ref_genome = "hg38"
genome_installation = ma.is_genome_installed(ref_genome=ref_genome,
- genomes_dir=genomes_dir)
+ genomes_dir=None)
print(ref_genome, "installation: ", genome_installation)
print("Instantiate TFinfo object")
tfi = ma.TFinfo(peak_data_frame=peak_gene,
- ref_genome="hg38",
- genomes_dir=genomes_dir)
+ ref_genome=ref_genome,
+ genomes_dir=None)
print("Motif scan")
tfi.scan(fpr=0.05,
motifs=None, # If you enter None, default motifs will be loaded.
diff --git a/src/methods/multi_omics/celloracle/script.py b/src/methods/multi_omics/celloracle/script.py
index da341a7df..c7dfc33f4 100644
--- a/src/methods/multi_omics/celloracle/script.py
+++ b/src/methods/multi_omics/celloracle/script.py
@@ -6,13 +6,12 @@
## VIASH START
par = {
- "multiomics_rna": "resources_test/grn-benchmark/multiomics_rna_d0_hvg.h5ad",
- "multiomics_atac": "resources_test/grn-benchmark/multiomics_atac.h5ad",
+ "multiomics_rna": "resources/grn-benchmark/multiomics_rna.h5ad",
+ "multiomics_atac": "resources/grn-benchmark/multiomics_atac.h5ad",
"base_grn": 'output/celloracle/base_grn.csv',
"temp_dir": 'output/celloracle/',
"num_workers": 10,
- "prediction": "output/celloracle_test.h5ad",
-}
+ "prediction": "output/celloracle.h5ad"}
## VIASH END
parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
@@ -21,7 +20,8 @@
parser.add_argument('--prediction', type=str, help='Path to the prediction file')
parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
-parser.add_argument('--num_workers', type=str, help='Number of cores')
+parser.add_argument('--num_workers', type=int, help='Number of cores')
+parser.add_argument('--max_n_links', type=int)
args = parser.parse_args()
if args.multiomics_rna:
@@ -34,38 +34,25 @@
par['tf_all'] = args.tf_all
if args.num_workers:
par['num_workers'] = args.num_workers
+if args.max_n_links:
+ par['max_n_links'] = args.max_n_links
if args.resources_dir:
+ meta = {}
meta['resources_dir'] = args.resources_dir
-
-
-par['links'] = f"{par['temp_dir']}/links.celloracle.links"
-
-
-import argparse
-parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
-parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file')
-parser.add_argument('--prediction', type=str, help='Path to the prediction file')
-parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
-parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
-parser.add_argument('--num_workers', type=str, help='Number of cores')
-args = parser.parse_args()
-
-if args.multiomics_rna:
- par['multiomics_rna'] = args.multiomics_rna
-if args.prediction:
- par['prediction'] = args.prediction
-if args.tf_all:
- par['tf_all'] = args.tf_all
-if args.num_workers:
- par['num_workers'] = args.num_workers
-
-if args.resources_dir:
- meta['resources_dir'] = args.resources_dir
-
-sys.path.append(meta["resources_dir"])
+try:
+ meta['resources_dir'] = args.resources_dir
+except:
+ pass
from main import main
os.makedirs(par['temp_dir'], exist_ok=True)
+
+
+if 'base_grn' not in par:
+ par['base_grn'] = f"{par['temp_dir']}/base_grn.csv"
+if 'links' not in par:
+ par['links'] = f"{par['temp_dir']}/links.celloracle.links"
+
prediction = main(par)
print('Write output to file', flush=True)
diff --git a/src/methods/multi_omics/scenicplus/main.py b/src/methods/multi_omics/scenicplus/main.py
index e5d4153cd..f9b568a36 100644
--- a/src/methods/multi_omics/scenicplus/main.py
+++ b/src/methods/multi_omics/scenicplus/main.py
@@ -333,8 +333,8 @@ def run_cistopic(par):
# LDA-based topic modeling
print('Run LDA models', flush=True)
- # n_topics = [2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
- n_topics = [40] #TODO: fix this
+ n_topics = [2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
+ # n_topics = [40] #TODO: fix this
if os.path.exists(par['MALLET_PATH']):
models = run_cgs_models_mallet(
cistopic_obj,
@@ -703,17 +703,17 @@ def preprocess_rna(par):
def snakemake_pipeline(par):
import os
snakemake_dir = os.path.join(par['temp_dir'], 'scplus_pipeline', 'Snakemake')
- # if os.path.exists(snakemake_dir):
- # import shutil
- # shutil.rmtree(snakemake_dir)
+ if os.path.exists(snakemake_dir):
+ import shutil
+ shutil.rmtree(snakemake_dir)
os.makedirs(os.path.join(par['temp_dir'], 'scplus_pipeline'), exist_ok=True)
os.makedirs(os.path.join(par['temp_dir'], 'scplus_pipeline', 'temp'), exist_ok=True)
pipeline_dir = os.path.join(par['temp_dir'], 'scplus_pipeline')
- if not os.path.exists(pipeline_dir):
- subprocess.run(['scenicplus', 'init_snakemake', '--out_dir', pipeline_dir])
- print('snake make initialized', flush=True)
+ # if not os.path.exists(pipeline_dir):
+ subprocess.run(['scenicplus', 'init_snakemake', '--out_dir', pipeline_dir])
+ print('snake make initialized', flush=True)
# Load pipeline settings
with open(os.path.join(snakemake_dir, 'config', 'config.yaml'), 'r') as f:
diff --git a/src/methods/multi_omics/scenicplus/script.py b/src/methods/multi_omics/scenicplus/script.py
index 876ecbe7b..d9478ddd7 100644
--- a/src/methods/multi_omics/scenicplus/script.py
+++ b/src/methods/multi_omics/scenicplus/script.py
@@ -23,6 +23,7 @@
parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
parser.add_argument('--num_workers', type=str, help='Number of cores')
+parser.add_argument('--max_n_links', type=int)
args = parser.parse_args()
if args.multiomics_rna:
@@ -33,6 +34,8 @@
par['prediction'] = args.prediction
if args.tf_all:
par['tf_all'] = args.tf_all
+if args.max_n_links:
+ par['max_n_links'] = args.max_n_links
if args.num_workers:
par['num_workers'] = args.num_workers
@@ -42,8 +45,10 @@
meta['resources_dir'] = args.resources_dir
par['num_workers'] = int(par['num_workers'])
print(par)
-
-sys.path.append(meta["resources_dir"])
+try:
+ sys.path.append(meta["resources_dir"])
+except:
+ pass
from main import *
@@ -69,22 +74,22 @@ def main(par):
par['MALLET_PATH'] = os.path.join(par['temp_dir'], 'Mallet-202108', 'bin', 'mallet')
os.makedirs(par['atac_dir'], exist_ok=True)
- print('------- download_databases -------')
- download_databases(par)
- print_memory_usage()
- print('------- process_peak -------')
- process_peak(par)
- print_memory_usage()
- print('------- run_cistopic -------')
- run_cistopic(par)
- print_memory_usage()
- print('------- process_topics -------')
- process_topics(par)
- print_memory_usage()
- print('------- preprocess_rna -------')
- preprocess_rna(par)
- print_memory_usage()
- print('------- snakemake_pipeline -------')
+ # print('------- download_databases -------')
+ # download_databases(par)
+ # print_memory_usage()
+ # print('------- process_peak -------')
+ # process_peak(par)
+ # print_memory_usage()
+ # print('------- run_cistopic -------')
+ # run_cistopic(par)
+ # print_memory_usage()
+ # print('------- process_topics -------')
+ # process_topics(par)
+ # print_memory_usage()
+ # print('------- preprocess_rna -------')
+ # preprocess_rna(par)
+ # print_memory_usage()
+ # print('------- snakemake_pipeline -------')
snakemake_pipeline(par)
print_memory_usage()
print('------- post_process -------')
diff --git a/src/methods/multi_omics/scglue/main.py b/src/methods/multi_omics/scglue/main.py
index 598788822..5e9d8a7b2 100644
--- a/src/methods/multi_omics/scglue/main.py
+++ b/src/methods/multi_omics/scglue/main.py
@@ -306,20 +306,20 @@ def main(par):
from util import process_links
# Load scRNA-seq data
- # os.makedirs(par['temp_dir'], exist_ok=True)
- # print('----- download_annotation ---- ', flush=True)
- # download_annotation(par)
- # print('----- download_motifs ---- ', flush=True)
- # download_motifs(par)
- # print('----- preprocess ---- ', flush=True)
- # preprocess(par)
- # print('----- training ---- ', flush=True)
- # training(par)
- # print('----- create_prior ---- ', flush=True)
- # create_prior(par)
- # print('----- pyscenic_grn ---- ', flush=True)
- # pyscenic_grn(par)
- # print('----- prune_grn ---- ', flush=True)
+ os.makedirs(par['temp_dir'], exist_ok=True)
+ print('----- download_annotation ---- ', flush=True)
+ download_annotation(par)
+ print('----- download_motifs ---- ', flush=True)
+ download_motifs(par)
+ print('----- preprocess ---- ', flush=True)
+ preprocess(par)
+ print('----- training ---- ', flush=True)
+ training(par)
+ print('----- create_prior ---- ', flush=True)
+ create_prior(par)
+ print('----- pyscenic_grn ---- ', flush=True)
+ pyscenic_grn(par)
+ print('----- prune_grn ---- ', flush=True)
prune_grn(par)
print('Curate predictions', flush=True)
pruned_grn = pd.read_csv(
diff --git a/src/methods/multi_omics/scglue/script.py b/src/methods/multi_omics/scglue/script.py
index 3326ea231..61bd1b722 100644
--- a/src/methods/multi_omics/scglue/script.py
+++ b/src/methods/multi_omics/scglue/script.py
@@ -42,8 +42,8 @@
if args.num_workers:
par['num_workers'] = args.num_workers
-if args.resources_dir:
- meta['resources_dir'] = args.resources_dir
+# if args.resources_dir:
+# meta['resources_dir'] = args.resources_dir
# get gene annotation
par['annotation_file'] = f"{par['temp_dir']}/gencode.v45.annotation.gtf.gz"
diff --git a/src/methods/single_omics/grnboost2/script.py b/src/methods/single_omics/grnboost2/script.py
index b7cc4ce47..98a06b9ad 100644
--- a/src/methods/single_omics/grnboost2/script.py
+++ b/src/methods/single_omics/grnboost2/script.py
@@ -84,18 +84,19 @@ def infer_grn(X, par):
return network
# par['cell_type_specific'] = False
-if par['cell_type_specific']:
- groups = adata_rna.obs.cell_type
- i = 0
- for group in tqdm(np.unique(groups), desc="Processing groups"):
- X_sub = X[groups == group, :]
- net = infer_grn(X_sub, par)
- net['cell_type'] = group
- if i==0:
- grn = net
- else:
- grn = pd.concat([grn, net], axis=0).reset_index(drop=True)
- i += 1
+if 'cell_type_specific' in par:
+ if par['cell_type_specific']:
+ groups = adata_rna.obs.cell_type
+ i = 0
+ for group in tqdm(np.unique(groups), desc="Processing groups"):
+ X_sub = X[groups == group, :]
+ net = infer_grn(X_sub, par)
+ net['cell_type'] = group
+ if i==0:
+ grn = net
+ else:
+ grn = pd.concat([grn, net], axis=0).reset_index(drop=True)
+ i += 1
else:
grn = infer_grn(X, par)
diff --git a/src/methods/single_omics/portia/script.py b/src/methods/single_omics/portia/script.py
index 26ffc6a3f..6fc4121b3 100644
--- a/src/methods/single_omics/portia/script.py
+++ b/src/methods/single_omics/portia/script.py
@@ -26,30 +26,44 @@
import argparse
parser = argparse.ArgumentParser(description="Process multiomics RNA data.")
parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file')
-parser.add_argument('--multiomics_atac', type=str, help='Path to the multiomics atac file')
parser.add_argument('--prediction', type=str, help='Path to the prediction file')
-parser.add_argument('--resources_dir', type=str, help='Path to the prediction file')
parser.add_argument('--tf_all', type=str, help='Path to the tf_all')
parser.add_argument('--num_workers', type=str, help='Number of cores')
+parser.add_argument('--max_n_links', type=str, help='Number of top links to retain')
+parser.add_argument('--causal', action='store_true', help='Enable causal mode')
+parser.add_argument('--normalize', action='store_true')
+
args = parser.parse_args()
if args.multiomics_rna:
par['multiomics_rna'] = args.multiomics_rna
+if args.causal:
+ par['causal'] = True
+else:
+ par['causal'] = False
+
+if args.causal:
+ par['normalize'] = True
+else:
+ par['normalize'] = False
+
if args.prediction:
par['prediction'] = args.prediction
if args.tf_all:
par['tf_all'] = args.tf_all
if args.num_workers:
par['num_workers'] = args.num_workers
-
-if args.resources_dir:
- meta['resources_dir'] = args.resources_dir
+if args.max_n_links:
+ par['max_n_links'] = int(args.max_n_links)
+
+os.makedirs(par['temp_dir'], exist_ok=True)
+import sys
try:
sys.path.append(meta["resources_dir"])
except:
- meta= {
- "resources_dir": 'src/utils/'
+ meta = {
+ 'resources_dir': 'src/utils'
}
sys.path.append(meta["resources_dir"])
from util import process_links
diff --git a/src/metrics/regression_2/main.py b/src/metrics/regression_2/main.py
index 4f35a29f8..3ef2769b8 100644
--- a/src/metrics/regression_2/main.py
+++ b/src/metrics/regression_2/main.py
@@ -341,11 +341,11 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
verbose_print(par['verbose'], f'Static approach (theta=0.5):', 3)
score_static_median = static_approach(net_matrix, n_features_theta_median, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers'], n_features_dict=n_features_dict, clip_scores=clip_scores)
# print(f'Static approach (theta=1):', flush=True)
- # score_static_max = static_approach(net_matrix, n_features_theta_max, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers'])
+ # score_static_max = static_approach(net_matrix, n_features_theta_max, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers'], n_features_dict=n_features_dict, clip_scores=clip_scores)
results = {
'static-theta-0.0': [float(score_static_min)],
- 'static-theta-0.5': [float(score_static_median)]
+ 'static-theta-0.5': [float(score_static_median)],
# 'static-theta-1.0': [float(score_static_max)],
}
@@ -363,12 +363,5 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
df_results_store.append(df_results)
df_results_concat = pd.concat(df_results_store, axis=0)
- df_results_concat.index.name = 'donor_id'
- print(df_results_concat.reset_index())
- df_results_mean = df_results_concat.reset_index().groupby('donor_id').mean()
- print(df_results_mean)
-
-
-
-
+ df_results_mean = df_results_concat.mean(axis=0).to_frame().T
return df_results_mean
diff --git a/src/metrics/script_all.py b/src/metrics/script_all.py
index 170efb8c7..4472139a0 100644
--- a/src/metrics/script_all.py
+++ b/src/metrics/script_all.py
@@ -59,30 +59,32 @@
os.makedirs(par['scores_dir'], exist_ok=True)
-for max_n_links in [50000, 10000]:
- par['max_n_links'] = max_n_links
- for apply_skeleton in [False, True]:
- par['apply_skeleton'] = apply_skeleton
- for layer in par['layers']:
- par['layer'] = layer
- i = 0
- for method in par['methods']:
- print(method)
- par['prediction'] = f"{par['models_dir']}/{method}.csv"
- if not os.path.exists(par['prediction']):
- print(f"{par['prediction']} doesnt exist. Skipped.")
- continue
- from regression_1.main import main
- reg1 = main(par)
- from regression_2.main import main
- reg2 = main(par)
- score = pd.concat([reg1, reg2], axis=1)
- score.index = [method]
- if i==0:
- df_all = score
- else:
- df_all = pd.concat([df_all, score])
- df_all.to_csv(f"{par['scores_dir']}/{max_n_links}-{apply_skeleton}-{layer}-{par['reg_type']}.csv")
- print(df_all)
- i+=1
+for binarize in [False, True]:
+ par['binarize'] = binarize
+ for max_n_links in [50000, 10000]:
+ par['max_n_links'] = max_n_links
+ for apply_skeleton in [True, False]:
+ par['apply_skeleton'] = apply_skeleton
+ for layer in par['layers']:
+ par['layer'] = layer
+ i = 0
+ for method in par['methods']:
+ print(method)
+ par['prediction'] = f"{par['models_dir']}/{method}.csv"
+ if not os.path.exists(par['prediction']):
+ print(f"{par['prediction']} doesnt exist. Skipped.")
+ continue
+ from regression_1.main import main
+ reg1 = main(par)
+ from regression_2.main import main
+ reg2 = main(par)
+ score = pd.concat([reg1, reg2], axis=1)
+ score.index = [method]
+ if i==0:
+ df_all = score
+ else:
+ df_all = pd.concat([df_all, score])
+ df_all.to_csv(f"{par['scores_dir']}/{max_n_links}-skeleton_{apply_skeleton}-binarize_{binarize}_{layer}-{par['reg_type']}.csv")
+ print(df_all)
+ i+=1
diff --git a/src/metrics/skeleton/script.py b/src/metrics/skeleton/script.py
index b2a8c0570..146dd5dc7 100644
--- a/src/metrics/skeleton/script.py
+++ b/src/metrics/skeleton/script.py
@@ -11,26 +11,13 @@
from ast import literal_eval
import requests
import torch
+
+
def preprocess(par):
print('Reading input files', flush=True)
rna = ad.read_h5ad(par['multiomics_rna'])
atac = ad.read_h5ad(par['multiomics_atac'])
-
- rna.layers["counts"] = rna.X.copy()
- sc.pp.highly_variable_genes(rna, n_top_genes=2000, flavor="seurat_v3")
- sc.pp.normalize_total(rna)
- sc.pp.log1p(rna)
- sc.pp.scale(rna)
- sc.tl.pca(rna, n_comps=100, svd_solver="auto")
- sc.pp.neighbors(rna, metric="cosine")
- sc.tl.umap(rna)
- print('step 1 completed')
-
- scglue.data.lsi(atac, n_components=100, n_iter=15)
- sc.pp.neighbors(atac, use_rep="X_lsi", metric="cosine")
- sc.tl.umap(atac)
- print('step 2 completed')
-
+
scglue.data.get_gene_annotation(
rna, gtf=par['annotation_file'],
gtf_by="gene_name"
@@ -71,140 +58,74 @@ def preprocess(par):
atac.write(f"{par['temp_dir']}/atac.h5ad")
nx.write_graphml(guidance, f"{par['temp_dir']}/guidance.graphml.gz")
-def training(par):
- os.makedirs(f"{par['temp_dir']}/glue", exist_ok=True)
- rna = ad.read_h5ad(f"{par['temp_dir']}/rna.h5ad")
- atac = ad.read_h5ad(f"{par['temp_dir']}/atac.h5ad")
- guidance = nx.read_graphml(f"{par['temp_dir']}/guidance.graphml.gz")
- scglue.models.configure_dataset(
- rna, "NB", use_highly_variable=True,
- use_layer="counts", use_rep="X_pca", use_batch='donor_id', use_cell_type='cell_type'
- )
- scglue.models.configure_dataset(
- atac, "NB", use_highly_variable=True,
- use_rep="X_lsi", use_batch='donor_id', use_cell_type='cell_type'
- )
- if False:
- guidance_hvf = guidance.subgraph(chain(
- rna.var.query("highly_variable").index,
- atac.var.query("highly_variable").index
- )).copy()
-
- glue = scglue.models.fit_SCGLUE(
- {"rna": rna, "atac": atac}, guidance,
- fit_kws={"directory": f"{par['temp_dir']}/glue"}
- )
-
- glue.save(f"{par['temp_dir']}/glue.dill")
-
- if True: # consistency score
- dx = scglue.models.integration_consistency(
- glue, {"rna": rna, "atac": atac}, guidance
- )
- dx.to_csv(f"{par['temp_dir']}/consistency_scores.csv")
-
- rna.obsm["X_glue"] = glue.encode_data("rna", rna)
- atac.obsm["X_glue"] = glue.encode_data("atac", atac)
- feature_embeddings = glue.encode_graph(guidance)
- feature_embeddings = pd.DataFrame(feature_embeddings, index=glue.vertices)
- rna.varm["X_glue"] = feature_embeddings.reindex(rna.var_names).to_numpy()
- atac.varm["X_glue"] = feature_embeddings.reindex(atac.var_names).to_numpy()
-
- rna.write(f"{par['rna-emb']}", compression="gzip")
- atac.write(f"{par['atac-emb']}", compression="gzip")
- nx.write_graphml(guidance, f"{par['guidance.graphml']}")
-def peak_tf_gene_connections(par):
- ''' Infers gene2peak connections
- '''
- print('reload the data')
- rna = ad.read_h5ad(f"{par['temp_dir']}/rna-emb.h5ad")
- atac = ad.read_h5ad(f"{par['temp_dir']}/atac-emb.h5ad")
- guidance = nx.read_graphml(f"{par['temp_dir']}/guidance.graphml.gz")
-
- rna.var["name"] = rna.var_names
- atac.var["name"] = atac.var_names
-
- genes = rna.var.index
- peaks = atac.var.index
- features = pd.Index(np.concatenate([rna.var_names, atac.var_names]))
- feature_embeddings = np.concatenate([rna.varm["X_glue"], atac.varm["X_glue"]])
- print('Get the skeleton')
-
- skeleton = guidance.edge_subgraph(
+ guidance = guidance.edge_subgraph(
e for e, attr in dict(guidance.edges).items()
if attr["type"] == "fwd"
).copy()
- print('reginf')
- reginf = scglue.genomics.regulatory_inference(
- features, feature_embeddings,
- skeleton=skeleton, random_state=0
- )
- print('gene2peak')
- gene2peak = reginf.edge_subgraph(
- e for e, attr in dict(reginf.edges).items()
- if attr["qval"] < 0.1
- )
+ sources = []
+ targets = []
+ for e, attr in dict(guidance.edges).items():
+ sources.append(e[1])
+ targets.append(e[0])
+ df = pd.DataFrame({'source': sources, 'target':targets})
+ df.to_csv(par['peak2gene.csv'])
+
+def get_flank_bed(par):
+ rna = ad.read_h5ad(par['multiomics_rna'])
- scglue.genomics.Bed(atac.var).write_bed(f"{par['temp_dir']}/peaks.bed", ncols=3)
- scglue.genomics.write_links(
- gene2peak,
- scglue.genomics.Bed(rna.var).strand_specific_start_site(),
- scglue.genomics.Bed(atac.var),
- f"{par['temp_dir']}/gene2peak.links", keep_attrs=["score"]
+ scglue.data.get_gene_annotation(
+ rna, gtf=par['annotation_file'],
+ gtf_by="gene_name"
)
- print('this is the motif file: ', par['motif_file'])
- motif_bed = scglue.genomics.read_bed(par['motif_file'])
- # motif_bed = motif_bed.iloc[:100000, :] #TODO: remove this
- # tfs = pd.Index(motif_bed["name"]).intersection(rna.var_names)
+ rna = rna[:, ~rna.var.chrom.isna()]
- print("Generate TF cis-regulatory ranking bridged by ATAC peaks", flush=True)
- peak_bed = scglue.genomics.Bed(atac.var.loc[peaks])
- peak2tf = scglue.genomics.window_graph(peak_bed, motif_bed, 0, right_sorted=True)
- # peak2tf = peak2tf.edge_subgraph(e for e in peak2tf.edges if e[1] in tfs)
+ flank_bed = scglue.genomics.Bed(rna.var).strand_specific_start_site().expand(par['flank_length']/2, par['flank_length']/2)
+ flank_bed.to_csv(par['flank2gene'])
- flank_bed = scglue.genomics.Bed(rna.var.loc[genes]).strand_specific_start_site().expand(500, 500)
+def skeleton_promotor(par):
+ '''Creates promotor based skeleton using TF motif data and TSS flank'''
+ flank_bed = pd.read_csv(par['flank2gene'])
+
+ motif_bed = scglue.genomics.read_bed(par['motif_file'])
+
flank2tf = scglue.genomics.window_graph(flank_bed, motif_bed, 0, right_sorted=True)
-
sources = []
targets = []
- for e, attr in dict(gene2peak.edges).items():
- sources.append(e[0])
- targets.append(e[1])
+ for e, attr in dict(flank2tf.edges).items():
+ sources.append(e[1])
+ targets.append(e[0])
df = pd.DataFrame({'source': sources, 'target':targets})
- df.to_csv(par['gene2peak'])
+ df.to_csv(par['skeleton_promotor_file'])
+
+def skeleton_peak(par):
+ '''Creates peak based skeleton using TF motif data'''
+ atac = ad.read_h5ad(f"{par['temp_dir']}/atac-emb.h5ad")
+
+ print('this is the motif file: ', par['motif_file'])
+ motif_bed = scglue.genomics.read_bed(par['motif_file'])
+
+ print("Generate TF cis-regulatory ranking bridged by ATAC peaks", flush=True)
+ skeleton_peak = scglue.genomics.Bed(atac.var)
+ peak2tf = scglue.genomics.window_graph(peak_bed, motif_bed, 0, right_sorted=True)
+ peak2tf = peak2tf.edge_subgraph(e for e in peak2tf.edges if e[1] in tfs)
sources = []
targets = []
for e, attr in dict(peak2tf.edges).items():
- sources.append(e[0])
- targets.append(e[1])
- df = pd.DataFrame({'source': sources, 'target':targets})
- df.to_csv(par['peak2tf'])
+ sources.append(e[1])
+ targets.append(e[0])
+ peak2tf = pd.DataFrame({'source': sources, 'target':targets})
- sources = []
- targets = []
- for e, attr in dict(flank2tf.edges).items():
- sources.append(e[0])
- targets.append(e[1])
- df = pd.DataFrame({'source': sources, 'target':targets})
- df.to_csv(par['flank2tf'])
+ # merge peak2tf with peak2gene
+ peak2tf.columns = ['peak', 'source']
-def merge_connections(par):
-
- gene2peak = pd.read_csv(par['gene2peak'], index_col=0)
- gene2peak.columns = ['target', 'peak']
+ peak2gene = pd.read_csv('output/skeleton/peak2gene.csv', index_col=0)
+ peak2gene.columns = ['peak', 'target']
- peak2tf= pd.read_csv(par['peak2tf'], index_col=0)
- peak2tf.columns = ['peak', 'source']
+ tf2gene = peak2gene.merge(peak2tf, on='peak', how='inner')[['source','target']].drop_duplicates()
- flank2tf= pd.read_csv(par['flank2tf'], index_col=0)
- flank2tf.columns = ['target', 'source']
- # merge gene2peak and peak2tf
- tf2gene = gene2peak.merge(peak2tf, on='peak', how='inner')[['source','target']].drop_duplicates()
- # merge flank2tf and tf2gene
- tf2gene = pd.concat([tf2gene, flank2tf], axis=0).drop_duplicates()
- tf2gene.to_csv(f"{par['tf2gene']}")
+ tf2gene.to_csv(par['skeleton_peak_file'])
if __name__ == '__main__':
par = {
@@ -212,39 +133,72 @@ def merge_connections(par):
'multiomics_rna': f"resources/grn-benchmark/multiomics_rna.h5ad",
'annotation_file': f"output/db/gencode.v45.annotation.gtf.gz",
# 'motif_file': 'output/db/ENCODE-TF-ChIP-hg38.bed.gz',
- 'motif_file': 'output/db/jaspar_encode.bed.gz',
'temp_dir': 'output/skeleton',
'extend_range': 150000,
- 'tf2gene': 'output/skeleton/tf2gene.csv'
+ 'skeleton': 'output/skeleton/skeleton.csv'
}
print(par)
os.makedirs(par['temp_dir'], exist_ok=True)
par['rna-emb'] = f"{par['temp_dir']}/rna-emb.h5ad"
par['atac-emb'] = f"{par['temp_dir']}/atac-emb.h5ad"
par['guidance.graphml'] = f"{par['temp_dir']}/guidance.graphml.gz"
-
- par['gene2peak'] = f"{par['temp_dir']}/gene2peak.csv"
- par['peak2tf'] = f"{par['temp_dir']}/peak2tf.csv"
- par['flank2tf'] = f"{par['temp_dir']}/flank2tf.csv"
+ par['peak2gene'] = f"{par['temp_dir']}/peak2gene.csv"
+ par['flank2gene'] = f"{par['temp_dir']}/flank2gene.csv"
- # ---- simplify
+
+ # ----- connect rna to atac -> peak2gene connections
if False:
- multiomics_atac = ad.read_h5ad(par['multiomics_atac'])
- multiomics_atac = multiomics_atac[:, :10000]
-
- par['multiomics_atac'] = f"{par['temp_dir']}/multiomics_atac.h5ad"
- multiomics_atac.write(par['multiomics_atac'])
-
- # ----- actual runs
- # print('------- preprocess ---------')
- # preprocess(par)
- # print('------- training ---------')
- # training(par)
- print('------- peak_tf_gene_connections ---------')
- peak_tf_gene_connections(par)
- print('------- merge_connections ---------')
- merge_connections(par)
-
+ print('------- preprocess ---------')
+ preprocess(par)
+ print('------- get flank ---------')
+ get_flank_bed(par)
+ print('------- promotor based skeleton for different motif files ---------')
+ names = ['encode','jaspar']
+ motif_files = ['output/db/ENCODE-TF-ChIP-hg38.bed.gz', 'output/db/JASPAR2022-hg38.bed.gz']
+ if True:
+ for i, motif_file in enumerate(motif_files):
+ par['skeleton_promotor_file'] = f"{par['temp_dir']}/skeleton_{names[i]}_promotor.csv"
+ par['motif_file'] = motif_file
+ skeleton_promotor(par)
+ # - merge them
+ for i in range(len(names)):
+ df = pd.read_csv(f"{par['temp_dir']}/skeleton_{names[i]}_promotor.csv", index_col=0)
+ print(df.shape)
+ if i ==0 :
+ skeleton = df
+ else:
+ skeleton = pd.concat([df, skeleton], axis=0).drop_duplicates()
+ print(skeleton.shape)
+ skeleton.to_csv(f"{par['temp_dir']}/skeleton_promotor.csv")
+ print('------- peak based skeleton for different motif files ---------')
+ if False:
+ for i, motif_file in enumerate(motif_files):
+ par['skeleton_peak_file'] = f"{par['temp_dir']}/skeleton_peak_{names[i]}.csv"
+ par['motif_file'] = motif_file
+ skeleton_peak(par)
+ # - merge them
+ print('merging peak2tf from different motifs')
+ for i, name in enumerate(names):
+ df = pd.read_csv(f"{par['temp_dir']}/skeleton_peak_{names[i]}.csv")
+ print(df.source.nunique())
+ print(df.target.nunique())
+ if i ==0 :
+ skeleton_peak = df
+ else:
+ skeleton_peak = pd.concat([df, skeleton_peak], axis=0).drop_duplicates()
+ skeleton_peak.to_csv(f"{par['temp_dir']}/skeleton_peak.csv")
+
+ print('------- mege peak based skeleton with promotor base skeleton ---------')
+ # - read peak based and promotor based skeletons
+ skeleton_peak = pd.read_csv(f"{par['temp_dir']}/skeleton_peak.csv")[['source', 'target']].drop_duplicates()
+ print(len(skeleton_peak), skeleton_peak.source.nunique(), skeleton_peak.target.nunique())
+ skeleton_promotor = pd.read_csv(f"{par['temp_dir']}/skeleton_promotor.csv")[['source', 'target']].drop_duplicates()
+ print(len(skeleton_promotor), skeleton_promotor.source.nunique(), skeleton_promotor.target.nunique())
+
+ # - merge and save
+ skeleton = pd.concat([skeleton_promotor, skeleton_peak], axis=0).drop_duplicates()
+ print(len(skeleton), skeleton.source.nunique(), skeleton.target.nunique())
+ skeleton.to_csv(f"{par['temp_dir']}/skeleton.csv")
diff --git a/src/robustness_analysis/script_all.py b/src/robustness_analysis/script_all.py
index f7431d266..aeecb75eb 100644
--- a/src/robustness_analysis/script_all.py
+++ b/src/robustness_analysis/script_all.py
@@ -9,23 +9,26 @@
'reg_type': 'ridge',
'read_dir': "resources/grn_models/",
'write_dir': "resources/results/robustness_analysis",
- 'degrees': [0, 10, 20, 50, 100],
+ # 'degrees': [0, 10, 20, 50, 100],
+ 'degrees': [50],
'noise_types': ["net", "sign"],
- 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle'],
-
+ # 'noise_types': ['weight'],
+ 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'],
"perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad",
"tf_all": "resources/prior/tf_all.csv",
"max_n_links": 50000,
- "apply_tf": "true",
- 'subsample': -2,
- 'verbose': 3,
- 'binarize': True,
+ "apply_tf": True,
+ 'binarize': False,
+ 'subsample': -1,
+ 'verbose': 0,
'num_workers': 20,
'consensus': 'resources/prior/consensus-num-regulators.json',
'static_only': True,
'clip_scores': True,
- 'layer': 'scgen_pearson',
+ 'layer': 'pearson',
+ 'apply_skeleton': True,
+ 'skeleton': 'resources/prior/skeleton.csv'
}
meta = {
@@ -38,7 +41,6 @@
os.makedirs(par['write_dir'], exist_ok=True)
os.makedirs(f"{par['write_dir']}/tmp/", exist_ok=True)
-
def run_reg(par):
from metrics.regression_1.main import main
reg1 = main(par)