diff --git a/runs.ipynb b/runs.ipynb index bc15760aa..fbff86287 100644 --- a/runs.ipynb +++ b/runs.ipynb @@ -81,9 +81,22 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'lightgbm'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 20\u001b[0m\n\u001b[1;32m 17\u001b[0m warnings\u001b[38;5;241m.\u001b[39msimplefilter(action\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m'\u001b[39m, category\u001b[38;5;241m=\u001b[39m\u001b[38;5;167;01mFutureWarning\u001b[39;00m)\n\u001b[1;32m 19\u001b[0m sys\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgrn_benchmark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhelper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m surragate_names\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhelper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 22\u001b[0m par \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 23\u001b[0m \u001b[38;5;66;03m# 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'genie3', 'grnboost2', 'scenic', 'scglue', 'celloracle'],\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmethods\u001b[39m\u001b[38;5;124m'\u001b[39m: [ \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcollectri\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnegative_control\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpositive_control\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpearson_corr\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mportia\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mppcor\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgrnboost2\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscenic\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscglue\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcelloracle\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscenicplus\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 25\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodels_dir\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/grn_models/\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 26\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscores_dir\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/scores\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 27\u001b[0m }\n", + "File \u001b[0;32m~/projs/ongoing/task_grn_inference/../grn_benchmark/src/helper.py:6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mlightgbm\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mlgb\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodel_selection\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m cross_validate\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'lightgbm'" + ] + } + ], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", @@ -104,7 +117,7 @@ "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "sys.path.append('../')\n", - "from grn_benchmark.src.commons import surragate_names\n", + "from grn_benchmark.src.helper import surragate_names\n", "from src.helper import *\n", "par = {\n", " # 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'genie3', 'grnboost2', 'scenic', 'scglue', 'celloracle'],\n", @@ -128,14 +141,7 @@ "outputs": [], "source": [ "if False: \n", - " create_skeleton() # create tf2gene putative links\n", - "if False: # check how predictions are included in the skeleton \n", - " all_links = 'path_2_skeleton'\n", - " par['models_dir'] = 'resources/grn_models/d0_hvg'\n", - " for method in ['scenicplus']:\n", - " prediction = pd.read_csv(f\"{par['models_dir']}/{method}.csv\", index_col=0)\n", - " prediction['link'] = prediction['source'].astype(str) + '_' + prediction['target'].astype(str)\n", - " print(method, len(prediction), np.intersect1d(all_links, prediction['link']).shape)" + " create_skeleton() # create tf2gene putative links\n" ] }, { @@ -147,34 +153,42 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "sbatch: error: Unable to open file scripts/sbatch/ppcor.sh\n" + "scenicplus\n", + "Job scenicplus submitted successfully.\n", + "Submitted batch job 7765370\n", + "\n" ] } ], "source": [ - "# # !sacct \n", - "# " + "if True: # local runs\n", + " run_grn_inference()\n", + "if False: # r based methods\n", + " !sbatch scripts/sbatch/ppcor.sh" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 62, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "han.h5ad jackson.h5ad\tshalek.h5ad\n" + ] + } + ], "source": [ - "if False: # local runs\n", - " run_grn_inference()\n", - "if False: # r based methods\n", - " !sbatch scripts/sbatch/ppcor.sh\n", - "if False: # seqera (celloracle) #TODO: add this to local \n", - " run_grn_inference_seqera()" + "!ls resources/grn-benchmark/mccalla/inference" ] }, { @@ -186,14 +200,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Submitted batch job 7759699\n" + "Submitted batch job 7761215\n" ] } ], @@ -207,532 +221,491 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 2, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
S1S2static-theta-0.0static-theta-0.5rank
scenicplus0.2450330.4034940.7605830.5392094
collectri-0.100238-0.2111820.4855060.45725911
negative_control-0.039305-0.0410040.2746590.44038312
positive_control0.1971290.5788220.8720030.5954892
pearson_corr0.2693790.5092970.7351560.5170563
portia0.1489410.2272480.4736070.4676079
ppcor0.0228460.0941070.4307760.44914410
grnboost20.3810320.4598600.7481750.6157901
scenic0.1446960.2065710.6850340.5564857
scglue0.0783090.2388590.5305310.4834238
celloracle0.2168970.3114510.7115490.5641606
scenicplus0.2450330.4034940.7605830.5392094
\n", - "
" - ], - "text/plain": [ - " S1 S2 static-theta-0.0 static-theta-0.5 rank\n", - "scenicplus 0.245033 0.403494 0.760583 0.539209 4\n", - "collectri -0.100238 -0.211182 0.485506 0.457259 11\n", - "negative_control -0.039305 -0.041004 0.274659 0.440383 12\n", - "positive_control 0.197129 0.578822 0.872003 0.595489 2\n", - "pearson_corr 0.269379 0.509297 0.735156 0.517056 3\n", - "portia 0.148941 0.227248 0.473607 0.467607 9\n", - "ppcor 0.022846 0.094107 0.430776 0.449144 10\n", - "grnboost2 0.381032 0.459860 0.748175 0.615790 1\n", - "scenic 0.144696 0.206571 0.685034 0.556485 7\n", - "scglue 0.078309 0.238859 0.530531 0.483423 8\n", - "celloracle 0.216897 0.311451 0.711549 0.564160 6\n", - "scenicplus 0.245033 0.403494 0.760583 0.539209 4" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "10000-skeleton_False-binarize_False_lognorm-ridge.csv\n", + "10000-skeleton_False-binarize_False_pearson-ridge.csv\n", + "10000-skeleton_False-binarize_True_lognorm-ridge.csv\n", + "10000-skeleton_False-binarize_True_pearson-ridge.csv\n", + "10000-skeleton_True-binarize_False_lognorm-ridge.csv\n", + "10000-skeleton_True-binarize_False_pearson-ridge.csv\n", + "10000-skeleton_True-binarize_True_lognorm-ridge.csv\n", + "10000-skeleton_True-binarize_True_pearson-ridge.csv\n", + "50000-skeleton_False-binarize_False_lognorm-ridge.csv\n", + "50000-skeleton_False-binarize_False_pearson-ridge.csv\n", + "50000-skeleton_False-binarize_True_lognorm-ridge.csv\n", + "50000-skeleton_False-binarize_True_pearson-ridge.csv\n", + "50000-skeleton_True-binarize_False_lognorm-ridge.csv\n", + "50000-skeleton_True-binarize_False_pearson-ridge.csv\n", + "50000-skeleton_True-binarize_True_lognorm-ridge.csv\n", + "50000-skeleton_True-binarize_True_pearson-ridge.csv\n" + ] } ], "source": [ - "df_scores = pd.read_csv(f\"resources/scores/hvg/skeleton_False/scgen_pearson-ridge.csv\", index_col=0)\n", - "df_all_n = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n", - "df_scores['rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n", - "df_scores" + "!ls resources/scores/" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 S1S2static-theta-0.0static-theta-0.5rankS1S2static-theta-0.0static-theta-0.5static-theta-1.0rank
collectri-0.100238-0.2111820.4855060.45725911
negative_control-0.044574-0.0451580.3598050.43845110
positive_control0.1971290.5788220.8720030.5954892
pearson_corr0.2734430.5163430.7829780.5382523
portia0.2633100.3570060.5663650.5075706
ppcor0.0179540.1597540.4680490.4549959
grnboost20.4219360.4893220.7889310.6294711
scenic0.1680060.2189160.7569650.5654345
granie0.0832980.1060120.1941640.36342512
scglue0.0808570.2936300.6603570.4807347
celloracle0.2091510.2914780.6900990.5763434
figr0.1136450.1931310.4280320.4652688collectri-0.052885-0.1622820.2419390.3116530.27939113
negative_control-0.038053-0.0534550.2048350.2986160.27932012
positive_control0.2854820.4973540.5582470.4370840.2914423
pearson_corr0.2279000.4181760.5581760.4337770.2904454
portia0.1143650.2476590.4637100.3426780.2817187
ppcor-0.009030-0.0267820.3283780.3156340.27893710
grnboost20.2773840.3880480.5830350.5012600.3055202
scenic0.1324730.1976080.5309810.4487230.3088126
granie0.0655660.0883140.1666340.2541590.26877911
scglue0.0543290.2533960.4771330.3344580.2821638
celloracle0.1760590.2405990.5783660.4685570.3009735
figr0.0970690.1603780.3084340.3530000.2852119
scenicplus0.2755610.3519450.6314250.5152920.3157561
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 8, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_scores = pd.read_csv(f\"resources/scores/full/skeleton_True/scgen_pearson-ridge.csv\", index_col=0)\n", + "df_scores = pd.read_csv(f\"resources/scores/50000-skeleton_True-binarize_True_pearson-ridge.csv\", index_col=0)\n", "df_all_n = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n", "df_scores['rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n", "df_scores.style.background_gradient()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Format resourcs used" + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 19, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(
,\n", - " )" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAFzCAYAAADSc9khAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6ZklEQVR4nO3de1xUdf4/8NfMMAwXB7yhiKJG4iVNNMVLiiAKWLRruGtR8Q0v3+2yrVlmLi75S8q+Ylmbrd3rG+qXStuyMjcRCc2yFKHybnnLK4gIDnIZ5nJ+fwwzOAJ6DgJnzszr+Xj4yDnzOWfefpxeHj7ncz5HJQiCACIi8ghquQsgIqL2w9AnIvIgDH0iIg/C0Cci8iAMfSIiD8LQJyLyIAx9IiIPwtAnIvIgXnIX0N6sVivOnj0LvV4PlUoldzlERDdMEARUVlYiJCQEavW1z+U9LvTPnj2L0NBQucsgImp1p06dQq9eva7ZxuNCX6/XA7B1TkBAgOj9TCYTNm/ejPj4eGi12rYqTzHYHw3YFw3YF87aqz8MBgNCQ0Md+XYtHhf69iGdgIAAyaHv5+eHgIAAfpnB/rgS+6IB+8JZe/eHmCFrXsglIvIgDH0iIg/C0Cci8iAMfSIiD8LQJyLyIAx9IiIPInvo19XVIS0tDV5eXjhx4sR123/33XcYM2YMoqOjMWbMGGzfvr3tiyQichOyztM/ceIE7rvvPvTv3x8Wi+W67X///XckJibiq6++QlRUFLZt24a77roLe/bsQZ8+fdqhYiIiZZP1TP/y5ctYs2YNZs6cKar9ihUrcMsttyAqKgoAEB0djQEDBuC1115ryzKJiNyGrGf6Q4YMAQCcPn1aVPu8vDxMmDDBaVtkZCS2bNnS6rURNUUQBFgFwCoIEARAgACTyYI6C1BTZ4FJUEGwvw9AEADUt7O1tx2jzmKF2dJwDNS3Fa74HMf+ti2O31+53XHcq1431NuwvaLaBG8vced5Vx7jmu3g3NBituC3Syr8eOwiNF6aqxuLPKZ4La2zdY55fRazGQcrVOjw2wVoNJrrts8/dB5jwrpg4sBu8NFev31LKGoZhmPHjmH69OlO24KDg3H8+PFm9zEajTAajY7XBoMBgO32aJPJJPqz7W2l7OPO2rM/BEGAodaMc5dqUVppRHWdBRarAJNVgMVqxYkL1ejk7w2LVXD8MpqtOHahCsEBOlisAsxWARcu1+FiVR06+3ujus6Cn09V4OZu/rBabSFttQqw1If6ZaMZJQYjunbwduxvsQqormtuGNILT+/Ka/O+UAYNVh7YLXcRLkSDtw4WiW696offsWNBNIL0OtH7SPn/UFGhX11dDZ3OuSN0Oh2qq6ub3Wfp0qXIyMhotH3z5s3w8/OTXENubq7kfdyZmP6wCkCtBag0ASYrUGFUocYCnLysgo8GsAjAqSpArwWOV6rQWQccMdjWENGoBFiEtlsCe89pwzXfv3C5rs0+GwC81QJUAFD/R1Q1/Nbx+sr3mtsuZp86K1BtVqG7r7hT2bboddHHlPDhYpu2yZ+nDQ56UwcBu77Nk3Tsa2Xg1RQV+n5+fk5n7YDtTP5a4b1w4ULMmzfP8dq+Gl18fLzkBddyc3MRFxfHhaTQ0B+TJ0/G8YtG5B8uRUWNCWcralFVZ8a3v5Uh0NcLNSYr6sxWSce+eMVf8dWB36ujD8xWAb07+8FLo4KXWgWVSoXfy6oxrFcg1GrbNrVaBUGwDWn069YBmvrtJosVGrUawQE6aNQqmK0CunbwhkZlO45aBWjUKqhUth/zfbUa+Hpr4KVWQVP/y8dLDS+N2ha2KsBsNiM/Px+xsRPhrdVCrVLVB6/K0cb23yteu+mzHPj/ibP26g/7CIYYigr9sLAwlJSUOG0rLi5GWFhYs/vodLpGPx0AgFarbdFfQkv3UzKTxYrN+0vwa0kljl+owv6zl2C1Cjhe5gX8sLXZ/S7VmJvc3k2vw2WjGQOC9SivqkPcLd2h1ahx4bIRQ3oGwmiy4qau/vD2UqNXJ18E+Grh7aWGXuflkmFpMpngowE6+vt63HejOZ74/8m1tHV/SDm2okJ/0qRJ+PHHH5227d69G5MnT5apIvciCAL2nzXgaOllfPfbBWz9tRTBAT7Ye+aSqP0jegViQLAeg3oEoHuAD0I6+qKLvze8vdTo6KeF7uoLe0TU7lw69O+//35oNBqsWbMGADB37ly8//77+P777zFu3Dhs374dhw4dwrp162SuVJmOnK/Em1uP4WjpZfx8qqLJNqWVzsNpiUN74LbenQDBijO/7ceUmLG4tVdn+Hoz0ImUQNbQr6urQ3x8PCoqKgAAycnJCA0NxSeffAIAqK2tdXreY58+ffDVV1/hqaeegre3N4xGI7766ivemCVR4e/lyPz6IApOlDf5foCPF8b16wqrICAsqAMmD+qOW3sGOk33M5lM+M/FfRge2hHaNppaRkStT9bQ9/b2xtatW5t9/7PPPmu0LSoqqtEQD11fTZ0FK/J+w1vbjjZ6r2dHX8xP6I9BPQIQ1rWD6LncRKQ8Lj28Qzfuu98uIOX9nY7ZKHaBvlo8MTkcM8fdJF9xRNTuGPpuquhkOV7cdAg/HrsIoCHw77w1GH8e0QuxA7vLWB0RyYWh72Yqquvw90/3IGe/89TWGbf3xTOJg+Cl4dANkSdj6LuRA2cNuPO1hqWme3XyxYrkYRjRp7OMVRGRK2Hou4kj5yvxl9UN652kjOmN56cOccmbmYhIPgx9N5B7oMQR+IG+Wnz66Fj066aXuSoickUMfQUrvlSLv6ze7bhjtmdHX7z9XyMY+ETULIa+Qn2y+xSe/vcex+v+3Tvgy7+Nb7M1uInIPTD0FWjVjhN49sv9jtf/N3s0xod3lbEiIlIKhr7C/O93x/HcVwcAALf0CMDah8dA78PVDIlIHIa+gqwtOOkI/DtvDcZrycM5756IJGHoK4AgCFj42V58XHAKAHDX0B5YkTwcGjWnYxKRNAx9F2e1Ckj813c4eM72ZJzbb+6C5dMjGPhE1CIMfRdWa7LgT2/ucAT+zHF98ewfBstcFREpGUPfhaW8txP7z9oCf+m0W3HfqN4yV0RESsfQd1HrfzqN3b/bHnLyVsptmDKkh8wVEZE74NQPF1RlNCPt070AgHtHhjLwiajVMPRd0IubDsFotsLPW4NFf7hF7nKIyI0w9F1M/uHzWPXD7wCAF5KGoIOOI3BE1HoY+i7kbEUNHv/oJwDAlMHBSBreS+aKiMjdMPRdhCAIeHhNISprzQjv1gHL74mQuyQickMMfRfxxc9nHUskv/jnoRzWIaI2wdB3AecNtUj7zLZM8oNj+2B4704yV0RE7oqh7wLe2HoUtSYrgvQ6/OPOQXKXQ0RujKEvs8tGM9btti2kNi+uPx+CQkRtiqEvs1U7TqC6zoJenXxx78hQucshIjfH0JdR2WUjVn5zBADw3+NvgporZxJRG2Poy+id7cdQY7Kge4AOD47tK3c5ROQBGPoyOVNRg/e2HwcAzI8fwLN8ImoXDH2ZrC04BYtVwLDQjpjOsXwiaicMfRlUGc14f/sxAEDq7X1kroaIPAlDXwbfHbmAqjoLenb0xR8jespdDhF5EIa+DNbVP+A8Krwrn3VLRO2Kod/ODhUbkHfoPAAgZQyHdoiofTH029mnhacBALEDu2FIz0CZqyEiT8PQb0cWq4BP6kP/T7dxrXwian8M/XaUf+g8KqpN8PfWYNKgbnKXQ0QeiKHfjnIPlAAA/jisJxdWIyJZMPTbiSAI2HLQFvoTBwTJXA0ReSqGfjvZe+YSyqrq4K1RIyqcoU9E8mDot5MNv5wFAEQPCIKvN4d2iEgeDP12YLEK+LI+9O8a2kPmaojIkzH028EvpytQYjDCR6vG5EHd5S6HiDwYQ78d5OwrBgBMHtQd/jovmashIk/mEqG/fv16REZGIioqCtHR0di/f3+zbY1GI5588klEREQgOjoao0ePxvr169uxWul+OlUBAJjQnxdwiUhesp927tq1C6mpqSgsLER4eDhWr16NhIQEHDx4EHq9vlH7JUuW4PPPP8fPP/+MwMBA/PTTTxgzZgx27dqFiIgIGf4E11ZaacTPJysAALf17iRvMUTk8WQ/08/MzERiYiLCw8MBACkpKTCbzcjKymqy/c8//4zIyEgEBtrWrRk+fDgCAwPxzTfftFfJkmzadw51Fitu7RmIm4P85S6HiDyc7KGfl5eHkSNHOl6r1WqMGDECW7ZsabL9n/70J2zfvh0nT54EAOTk5KC0tBTdu7vmBVL70E5UeFeoVFxGmYjkJevwTllZGQwGQ6PADg4ORkFBQZP7zJgxA9XV1Rg6dCh69OiBX3/9FX/+859xzz33NNneaDTCaDQ6XhsMBgCAyWSCyWQSXau9rZR9AGDHkQsAgBG9AyXv68pa2h/uiH3RgH3hrL36Q8rxZQ396upqAIBOp3PartPpHO9d7b333kNmZiYKCwtx880345dffsGWLVugVjf9Q8vSpUuRkZHRaPvmzZvh5+cnuebc3FzRbS8agWKDF1QQcOHgLvznN8kf5/Kk9Ie7Y180YF84a+v+aC4vmyJr6NtD98ozcfvrpgJZEAQsWLAATz31FG6++WYAQEREBObNm4eamho888wzjfZZuHAh5s2b53htMBgQGhqK+Ph4BAQEiK7VZDIhNzcXcXFx0Gq1ovbZefwiULQbfbv4Y9ofx4v+LCVoSX+4K/ZFA/aFs/bqD/sIhhiyhn6XLl0QGBiIkpISp+3FxcUICwtr1L60tBTl5eXo27ev0/abbroJn376aZOhr9PpGv0kAQBarbZFfwlS9hNUtp8+dFqN2/4P0NJ+dEfsiwbsC2dt3R9Sji37hdzY2FgUFhY6XguCgKKiIkyePLlR265du0Kn0+HcuXNO28+dO9eioZq2ZrYIAACtRvZuJiIC4AKhn5aWho0bN+LIkSMAgOzsbGg0GqSmpgIAxo8fj/T0dAC2mT2pqal47733UF5eDgAoKipCbm5usxdy5WSyWAEAWg1n7RCRa5D95qxRo0YhKysLycnJ8PX1hVqtRk5OjuPGrOrqaqcx/3/+859YvHgxJk2aBD8/P1RWViIzMxOPP/64XH+EZpmttjN9L57pE5GLkD30ASApKQlJSUlNvldUVOT02s/PDy+++GJ7lHXDLlbVAQCfkkVELoOnoG1o94mLAIBbeoifJURE1JYY+m3owDnbNKrIvlxzh4hcA0O/jVTXmXHk/GUAwC0hPNMnItfA0G8je09fglUAuul1CA7wkbscIiIADP02c7ikEgAwqEcAF1ojIpfB0G8jh4ttoT+wR+NnAhARyYWh30aOX6gCAIR15Rr6ROQ6GPptwGIVsOf0JQDA4JBAmashImogOfSzs7Pbog63cqKsCpeNZui81BgYzOEdInIdku/IXbBgAUwmE+655x6XXOTMFfxaP57fr1sHLsFARC5FciL16NEDFRUViIuLw6xZs7B9+/a2qEvRDtaH/hAO7RCRi5Ec+mvWrMETTzyB77//HnPmzMEnn3yCMWPGYOnSpThz5kxb1Kg4J8tsF3H7dOVPQkTkWiSH/qBBgxy/Hz58OO6//34MGDAA6enpGDx4MO688058+umnEAShVQtVkqOlttDv24Uzd4jItUgO/cTERJw/fx7Lly/H4MGDMX78eJw+fRqrVq3CuXPn8O6772Lv3r24//7726Jel1dntjrm6HN4h4hcjeQLuVu3bkVoaChCQkKQmpqKGTNmOD2+sGfPnli8eDGGDx/emnUqxpHzl1FnsULv44XQzr5yl0NE5ERy6Hfr1g3vv/8+YmNjm22zdOlSj53Zc7TUtshav24duPwCEbkcycM76enpjQK/rq4Ob775JsrKygAACxcuxPfff986FSqMfTllzs8nIlckOfQ//PDDRttUKhUqKysxffr0VilKyQ45Qp/LKROR62mVO4e0Wi0WLFiAqqqq1jicYgmCgF/ql1+4tRcv4hKR6xE1pr9ixQqsWLECAFBcXIywsLBGbS5duoSRI0e2bnUKU2k0O56Ly+EdInJFokI/JiYGHTt2hCAIWLZsGdLS0pzeV6vVCAoKuubFXU9QfKkWABDg4wU/b5d45jwRkRNRyRQREYGIiAgAgE6nw3333demRSnV6fJqAEDPTp45c4mIXJ/kMf1rBf5DDz10Q8Uo3YkLttDv05mhT0SuSdSZ/ueff47OnTtjwoQJmDVrVrPtNm3a1GqFKdH5SiMAoEdHPhOXiFyTqDP9559/Hm+99RYA4Ouvv4YgCE3+8nTl9RdxO/t5y1wJEVHTRJ3pFxYWOn4fHx+PDz74oMl2qamprVOVQpVetp3pd+mgk7kSIqKmSR7TX7VqVYve8wT2C7khHN4hIhclOfQLCgrw3HPP4dixYwCAN954AxEREZg+fTpKSkpavUClEAQBZytsUzZ7cfYOEbkoyaGfkZEBq9WKTp064aeffsKcOXOQkJCAXr164W9/+1tb1KgINSYLLhvNAIDgQJ7pE5FrknwHUVVVFRYvXgwAePbZZzFx4kS8+OKLAIDx48e3anFKct5gG8/31Wrg762RuRoioqZJPtM3mUwAAKPRiE8++QSzZ892vOft7bmzVooNtqGd4EAfLqlMRC5L8pl+jx49MGPGDBQXFwMApk2bBkEQkJOTA6PR2OoFKsW5SzUAgOAADu0QkeuSfKb/9ttvw9/fHz4+Pli/fj10Oh2++OILLFu2DI899lhb1KgIZ8ptod+zE5+WRUSuS/KZfufOnfH66687bbv77rtx9913Ox6i4olK6sf0eaZPRK6sVdbTt/Pkh6iU1I/pdwvgjVlE5Lokh/7PP/+MmJgYdOrUCRqNxunXtm3b2qJGRThTYRveCQnk8A4RuS7JwzupqamYPHkynnrqKej1esdMFUEQ8OSTT7Z6gUpxrn4t/ZCODH0icl2SQ1+v1+Pll19u8j3707U8jdlidTwxi8M7ROTKJA/vDB06FBcuXGjyvaKiohsuSIkuVtsCX60COnGFTSJyYS060x89ejRiY2MREhICjabh7tOsrCw88cQTrVmfIpRdtoV+Rz9vaNS8MYuIXJfk0H/nnXcwbNgwHDlyBEeOHHF6r6KiorXqUhT7s3E5XZOIXJ3k0B8/fjw2bNjQ5Hue+uxc+8ydHlxojYhcnOQx/eYCHwA++uijGypGqYo5c4eIFKJFN2ft3LkTqampuPfeewEAb731Fufog0sqE5Hrkxz6n3/+OSZPnozy8nIcPHgQADBw4EAsXLgQH3/8cYuKWL9+PSIjIxEVFYXo6Gjs37//mu2PHTuGP/3pT5g4cSIGDx6MMWPGYPfu3S367NZwtj70e3HdHSJycZJD/+WXX8Yvv/yCL7/8El26dAEAxMTEIDc3F2+88YbkAnbt2oXU1FR8+OGH2L59O2bPno2EhARUVlY22b60tBSTJk3C3LlzkZ+fj19++QV+fn6NLiq3J8eyyryQS0QuTnLoazQahIWFAYDTuvH+/v6wWq2SC8jMzERiYiLCw8MBACkpKTCbzcjKymqy/bJlyzB27FhMmDABAODl5YV33nnH8VoOFypti60F6XljFhG5NsmhX1lZiXPnzjXavnfv3mbPzq8lLy8PI0eObChIrcaIESOwZcuWJtt/9tlnjQK+X79+CAkJkfzZraGy1oSqOgsAoBvP9InIxUmesjl37lxEREQgOTkZp06dQkZGBg4fPowvv/wS77zzjqRjlZWVwWAwoHv37k7bg4ODUVBQ0Kh9VVUVjh8/DovFggceeAAnTpxAhw4d8MQTT+COO+5o8jOMRqPTw10MBgMA2xPA7E8BE8Pe9up9zpVXAQD8dRro1IKkYypZc/3hidgXDdgXztqrP6QcX3LoP/jgg+jevTuWLl2K8vJy/Otf/8KQIUOwfv16xMXFSTpWdXU1AECncx4W0el0jveuZL/5a9GiRcjPz0dERATy8vKQkJCAr7/+usnPX7p0KTIyMhpt37x5M/z8/CTVCwC5ublOr48aAMALvjDjP//5j+TjKd3V/eHJ2BcN2BfO2ro/msrL5kgOfQBISEhAQkJCS3Z1Yg/dqx+zaDQamwxk+5IPf/jDHxAREQEAmDRpEmJjY7FixYomQ3/hwoWYN2+e47XBYEBoaCji4+MREBAgulaTyYTc3FzExcVBq9U6tm8+UALs/wW9unXEnXeOFn08pWuuPzwR+6IB+8JZe/WHfQRDDMmhX1hYiIKCAlRUVKBz584YNWoUhg0bJvUwAIAuXbogMDAQJSUlTtuLi4sdF4uvFBQUBJ1Oh549ezpt79OnD3bs2NHkZ+h0ukY/SQCAVqtt0V/C1ftVGm0Xr7v46zzyS97SfnRH7IsG7Atnbd0fUo4tOvSPHj2KlJQU7Nq1C4IgOLarVCqMHTsW2dnZ6NOnj7RKAcTGxqKwsNDxWhAEFBUVIT09vVFbjUaDcePGNbqQXFJSgt69e0v+7NZQVr+kcid/rq5JRK5P1OydsrIyTJw4EZ07d8amTZtQVlYGk8mECxcuYOPGjdDr9YiOjkZ5ebnkAtLS0rBx40bHPPvs7GxoNBqkpqYCsK31c+U/AH//+9/xxRdf4OTJkwCAAwcOYPPmzbI9lP3CZdvQVNcOnK5JRK5P1Jn+yy+/jMTERLz55ptO2zt37owpU6ZgypQpeOSRR7B8+XK88MILkgoYNWoUsrKykJycDF9fX6jVauTk5ECv1wOwXaC4csw/Pj4er732GqZOnYoOHTrAbDZj1apVuOuuuyR9bmuxL6vctQPP9InI9YkK/dzcXOTn51+zzYsvvohJkyZJDn0ASEpKQlJSUpPvNfVglpSUFKSkpEj+nLZQVmX7B6kLQ5+IFEDU8I6vry86dOhwzTYBAQHw9fW8tWfsZ/pd/Dm8Q0SuT1Toe3mJu94rtp07Ka9/VCIfk0hESiAqpQ8ePIhZs2Zdt92hQ4duuCAlEQQBFdW2O+E6+nF6GhG5PlGhX1tbi+PHj4tq50mq6ywwmm3z9DtzyiYRKYCo0B82bNh1L+QCwMSJE2+4ICWxD+14a9Tw89ZcpzURkfxEjelv3rxZ1MHEtnMX5VW2oZ1O/lqnZaaJiFyVqNAXe4uvp912fZEXcYlIYVr0jFyyqay1nekH+HrWP3ZEpFwM/RtQbl93hzN3iEghGPo34GL9mD5n7hCRUrQ49E0mk2PRs5Y8G9cdOJZg4N24RKQQkkPfaDTikUcegb+/v2OK5qxZszB79mzU1NS0eoGuzLEEA9fdISKFkBz6aWlpOHPmDD7++GN069YNAPDee+9h0KBBTk+o8gSllVxWmYiURXLo7969G1988QWmTZvmWGDNy8sL8+fP97hlGOxr6QfpGfpEpAySQ99isUCttu125RO0AODixYutU5VC2Ofpd+GFXCJSCMmhHxgYiHfffRcAHHehVlVV4Zlnnmn07Fp3JggCLtXYZu8Ecp4+ESmE5LWQV6xYgSlTpuDpp5+GxWLBTTfdhHPnzqFXr17IyclpixpdUnWdBfYfdDr4eN6S0kSkTJLTqn///jh06BCys7Oxf/9+AMCQIUNw//33w9vbc4Y57Gf5Wo0KvloutkZEyiA59N9++208/PDDmDlzZlvUoxj2dfQDfbnYGhEph+TQT09PR0VFBVJSUjxqDP9qBq67Q0QKJPlCbnh4OEJDQ/HQQw9hypQpWL16Naqrq9uiNpdmH94J8GHoE5FySD7TX7duHUJDQ3H//fejuLgY2dnZiIuLw80334zU1FRMmjSpLep0OfbQ52MSiUhJJJ/p9+rVy/H74OBgjB07Frfeeiv+/e9/4+67727N2lyagWf6RKRAkkN/0qRJOHr0KBYvXox+/fohOjoaR48exZtvvolz5861RY0u6bLRDIDTNYlIWSQn1vfff4/+/ftj0KBB+Mtf/uKxF3Qv19aHvo6hT0TKITmx+vTpg7Vr12L48OFtUY9iVNVZAAD+3gx9IlIOycM7q1evbjbwP/vssxsuSClq6mxn+v463phFRMoh6jS1rq4OWq3tJqS6ujp8++23TbZbsmQJpk2b1qoFuioDh3eISIFEJVa/fv0wcOBAbN68GTExMc2286Q7U8vrV9js6Oc5S08QkfKJCv3PPvsMer0eABAdHY38/Pwm29mfpOUJKuvP9LnCJhEpiajQHzlypOP3b7zxRpNtqqqqmn3PHVXWL8PA4R0iUhLJF3Jfe+21RtuqqqowevRoZGdnt0pRSlBltM3e4Tx9IlISyYl1+PDhRtv8/f2xb98+REVFtUpRrk4QBFRx9g4RKZCo0N+2bRu2bdsGADhx4gSee+65Rm3Ky8tRVlbWutW5qEqj2fEAFb2OY/pEpByiQv/EiROOi7fl5eWNLuSq1WoEBQU5HqPo7uwXcbUaFXy0kkfIiIhkIyr0U1NTkZqa6vj9qlWr2rQoV2dfgkHvwweoEJGySD5NvVbg5+Xl3VAxSmGfuaPnRVwiUpgWpZbVasXRo0dRXFwMwT64DeDpp59GUVFRqxXnqiqNvBuXiJRJcmodPHgQSUlJ+PXXX6FSqZxC31OGOhqGdxj6RKQskod3nnjiCSxatAg1NTWYMGECrFYramtrkZ2djWeffbYtanQ5VfVn+lxhk4iURnLoG41GPPDAA9DpdI5t3t7euO+++/DTTz+1anGuqsZkuzHL15tz9IlIWSSHvslkcvzeYrE45ubX1NRg3759rVeZC6uuX0vfV8vQJyJlkRz6PXv2RHJyMioqKjBx4kSMHj0af/nLXxAZGYkBAwa0RY0up6Y+9P14pk9ECiN5UPqll17Cvn37oNVqsXDhQly4cAHbt2/HkCFD8Morr7RFjS6nYXiHY/pEpCySz/T79OmDxMRE+Pv7w8fHB6+//jr27NmDjz/+GCEhIZILWL9+PSIjIxEVFYXo6Gjs379f1H4rV66ESqXC1q1bJX/mjeLwDhEpVauuIZCUlCSp/a5du5CamooPP/wQ27dvx+zZs5GQkIDKyspr7nf27Fm89NJLN1LqDbHP3uEKm0SkNKJSKzY2VtTBfv75Z0kfnpmZicTERISHhwMAUlJSsGDBAmRlZWHOnDnN7jdnzhz84x//wCOPPCLp81pLw5RNnukTkbKICv3jx49jxowZ12134sQJSR+el5eH//f//p/jtVqtxogRI7Bly5ZmQ3/Dhg3QarVISEiQ9FmtyTG8w9AnIoURFfr33XefqBuvjEaj6A8uKyuDwWBA9+7dnbYHBwejoKCgyX2qqqqQnp6OnJwc0Z9lNBqd2hoMBgC2qadXTj+9Hntbk8mEqjrb73VqSDqGO7myPzwd+6IB+8JZe/WHlOOLCv3/+Z//EXWwu+66S/QHV1dXA4DTTV721/b3rrZo0SI88sgj6NGjh+ifKpYuXYqMjIxG2zdv3gw/Pz/R9drl5uaitEwDQIU9PxfCeFy47j7uLDc3V+4SXAb7ogH7wllb90dzmdkUyVciT5482ex78+fPx44dO0Qdxx64V5+xG43GJsO4qKgIO3fuxPLlyyVUCyxcuBDz5s1zvDYYDAgNDUV8fDwCAgJEH8dkMiE3NxdxcXFYfuhHoKYGMePGYnjvjpLqcRdX9odW69kPkmFfNGBfOGuv/rCPYIghOfT79u3bKgurdenSBYGBgSgpKXHaXlxcjLCwsEbtN27ciJqaGsdF5draWgC2tYA6duyI9957D/369Wu0n06na/TTBABotdoW/SVotVrUmKwAAL2fzuO/2C3tR3fEvmjAvnDW1v0h5diSQ3/06NH4+OOPHa8tFgtOnz6NtWvXYsKECZKOFRsbi8LCQsdrQRBQVFSE9PT0Rm0XLVqERYsWOV6fOHECN910E1599VXExMRI/WPckFoT5+kTkTK16CEqffr0cfwKCwvDhAkT8Prrrzv9YyBGWloaNm7ciCNHjgAAsrOzodFoHE/pGj9+fJP/AMjNaLaFvo6PSiQihZF8pt+/f/8mt5tMJvz222+SjjVq1ChkZWUhOTkZvr6+UKvVyMnJgV6vB2C7ONHULJ0nnngCP/74o+P3AwcOlPwPTktZrQJMFtvFW28NQ5+IlEVy6M+aNavRtsrKShQVFWHUqFGSC0hKSmr2Tt7mnsL16quvSv6c1lJnsTp+7+3F0CciZZGcWl9//TUEQXD8AoCQkBA888wzyMrKau36XE6duSH0dV4c0yciZZF8pp+cnIx//vOfbVGLItTWh75aBWg1nvF4SCJyH5LP9K8V+KtXr76hYpTAPnPHR6vxmGcCE5H7aNEykb///jt++eUXXLp0yenB6JmZmXjwwQdbrThXdGXoExEpjeTQX7ZsGdLT09G5c2f4+/s7vXf1jVbuqLb+xizO0SciJZIc+u+//z7279/f5KMR5Vz5sr0Y68f0dZy5Q0QKJDm5Bg8e3OyzcNeuXXvDBbk6e+hzuiYRKZHk5Hr88cfx1ltv4ezZs07j+QAwbdq0VivMVTnuxmXoE5ECSU4uvV6PN954A6GhofDy8oJGo3H82rZtW1vU6FLs8/R1HNMnIgWSPKY/c+ZMTJ06FcuWLXNaAlkQBDz55JOtWpwrsl/I5ewdIlIiyaHfqVMnLFmypMn3XnnllRsuyNXxQi4RKZnk5Lr99ttx/PjxJt/Lycm54YJcXQ2XVSYiBZN8pn/u3DmMGjUKw4cPR48ePaDRNITfpk2bkJmZ2aoFuhqupU9ESiY59Ddv3uz0LNyrZ/C4u5q6+tD3ZugTkfJIDv277roL7777bpPvecKFXPvwjh9Dn4gUSPKYfnOBDwAvvPDCDRWjBBzTJyIla9UpKFcO+7irKiOHd4hIuSQP74SFhTX7XnFx8Q0VowTV9WP6/roWLVBKRCQrycml0+mQlpbmeG2xWHDmzBls2LABjz76aKsW54o4pk9ESiY59DMyMnDPPfc02v7kk0/ikUceaZWiXJl9yiYflUhESiR5TL+pwAeADh064MiRIzdckKurqV+GgWf6RKREks/0m3okYmVlJXbs2AG12v2XJuCTs4hIySSH/sMPP4zg4GDHa5VKBb1ej2HDhiE7O7tVi3NFHNMnIiWTHPpjxoxBfn5+W9SiCEbHKpvu/1MNEbkfycnlyYEPNJzp80IuESmRqNAvLS3Fc889h+eeew4HDhxo9P6CBQtQWlra6sW5GqvQsLQy5+kTkRKJCv21a9fihRdewKVLl9CxY8dG7x88eBBjx47FmTNnWrs+l1I/sgOAY/pEpEyiQv+LL77AunXr8PLLLyMkJKTR+xs2bMDcuXORkZHR6gW6kvoVGADwISpEpEyikqu6uhpTp069Zps5c+Zg//79rVKUqzLXryKt81JDpVLJWwwRUQuICn0fHx9RB9PpdDdUjKuzD+/wLJ+IlEpUeplMJlit1mu2sVgsqKura5WiXFX9NVx4c+YOESmUqNCPi4vD3//+92u2SU9PR0JCQqsU5aquHN4hIlIiUfMO58+fj4kTJ2LEiBG47777MHDgQHTo0AFVVVU4cOAA1q1bBz8/P+Tm5rZ1vbIyc3iHiBROVOj7+voiPz8fixYtwpIlS2AwGKBSqSAIAgIDA/Hoo49i8eLF8Pb2but6ZWW22i7eajUMfSJSJtF3GPn6+mL58uVYtmwZDh065JizP3DgQI9YaA1oGN7x5pk+ESmU5NtKNRoNBg8e3Ba1uLyGC7kMfSJSJqaXBPYpm3woOhEpFUNfAs7eISKlY3pJ4Lg5i8sqE5FCMb0ksIe+D2/OIiKFYuhLwAu5RKR0TC8JTPXz9DmmT0RKxfSSgPP0iUjpmF4S2Id3eEcuESmVS6TX+vXrERkZiaioKERHR19zXf5169YhPj4ekyZNQmRkJKZPn44TJ060S50WnukTkcLJnl67du1CamoqPvzwQ2zfvh2zZ89GQkICKisrm2yfkpKCp556Cnl5edi5cyd8fX0xZcoUGI3GNq+VF3KJSOlkT6/MzEwkJiYiPDwcgC3UzWYzsrKymmw/depUxxLOarUajz/+OA4fPoyioqI2r9Uxps/hHSJSKNnTKy8vDyNHjnS8VqvVGDFiBLZs2dJk+08++cTptf2pXu1xpm8f3uGYPhEpleQF11pTWVkZDAYDunfv7rQ9ODgYBQUFoo7xww8/ICQkBOPGjWvyfaPR6PQPgsFgAGB7GpjJZBJdq8lkgqV+eEejEiTt647sf35P7weAfXEl9oWz9uoPKceXNfSrq6sBNH62rk6nc7x3LUajES+99BJWrlwJrVbbZJulS5ciIyOj0fbNmzfDz89PUr1mwXaGf3DfXvzn/B5J+7ord39wjhTsiwbsC2dt3R9i8tJO1tC3h+7VQzNGo1FUID/88MO49957kZSU1GybhQsXYt68eY7XBoMBoaGhiI+PR0BAgOhaTSYTXj+QBwAYcdsw3Dm0h+h93ZHJZEJubi7i4uKa/QfXU7AvGrAvnLVXf9hHMMSQNfS7dOmCwMBAlJSUOG0vLi5GWFjYNfdNS0uDn58fnn/++Wu20+l0jX6SAACtViv5L8FSf0eur7f0fd1VS/rRXbEvGrAvnLV1f0g5tuxXJGNjY1FYWOh4LQgCioqKMHny5Gb3yczMxKlTp7By5UoAQGFhodMx2oqJN2cRkcLJnl5paWnYuHEjjhw5AgDIzs6GRqNBamoqAGD8+PFIT093tH/rrbfwf//3f5gzZw6Kioqwe/dubNiwAXv37m3zWh3r6XNpZSJSKFmHdwBg1KhRyMrKQnJyMnx9faFWq5GTkwO9Xg/AdoHCPuZfWVmJxx57DFarFWPHjnU6zgcffNDmtVo4T5+IFE720AeApKSkZi/GXnnTlV6vh8Viaa+yGnGsvcM7colIoZheEvCOXCJSOqaXBLwjl4iUjuklgf2OXC+NSt5CiIhaiKEvgeNMX81uIyJlYnpJ4Ah9L57pE5EyMfRFEgQBFsEW9l480ycihWJ6iWS2Co7fc/YOESkV00skk/0qLji8Q0TKxdAXyWxpONPn8A4RKRXTSySnM31O2SQihWLoi2SqH9P3UqugUjH0iUiZGPoi2c/0eWMWESkZQ18k+5g+l2AgIiVjgolkD30vNc/0iUi5GPoimay24R2e6RORkjHBRDLxTJ+I3ABDXyQzL+QSkRtg6ItkP9PnEgxEpGRMMJHMVg7vEJHyMfRFsljtwzvsMiJSLiaYSPYpmxqe6RORgjH0ReLwDhG5A4a+SBZ76HP2DhEpGENfJPuCaxoutkZECsbQF6nhQi5Dn4iUi6EvUsPaO+wyIlIuJphIJo7pE5EbYOiLZF+GQcszfSJSMCaYSPbZO5ynT0RKxtAXybHKJod3iEjBGPoi2c/0+VB0IlIyhr5I5vopmxzeISIlY+iLZHaM6bPLiEi5mGAiWbj2DhG5AYa+SAx9InIHDH2RuMomEbkDhr5I9jN9NUOfiBSMoS8Sz/SJyB0w9EXimD4RuQOGvkiOKZu8OYuIFIyhL5J9wTUurUxESsYEE4nDO0TkDhj6Ipm5yiYRuQGGvkgNT85i6BORcrlE6K9fvx6RkZGIiopCdHQ09u/f36rtW4PJ/hAVjUt0GRFRi3jJXcCuXbuQmpqKwsJChIeHY/Xq1UhISMDBgweh1+tvuH1rMfNxiUTkBmQ/bc3MzERiYiLCw8MBACkpKTCbzcjKymqV9q3F5Ji9w9AnIuWSPfTz8vIwcuRIx2u1Wo0RI0Zgy5YtrdK+tZgdD1GRvcuIiFpM1uGdsrIyGAwGdO/e3Wl7cHAwCgoKbrg9ABiNRhiNRsdrg8EAADCZTDCZTKJrNZltZ/oQrJL2c1f2PmBfsC+uxL5w1l79IeX4soZ+dXU1AECn0zlt1+l0jvdupD0ALF26FBkZGY22b968GX5+fqJr7WBW4ya9Cr/uLULdCdG7ub3c3Fy5S3AZ7IsG7Atnbd0fzeVfU2QNfXvoXnkmbn/dVCBLbQ8ACxcuxLx58xyvDQYDQkNDER8fj4CAANG1xplMyM3NRVxcHLRarej93JWJ/eHAvmjAvnDWXv1hH8EQQ9bQ79KlCwIDA1FSUuK0vbi4GGFhYTfcHrD9FHD1TwYAoNVqW/SX0NL93BX7owH7ogH7wllb94eUY8t+VTI2NhaFhYWO14IgoKioCJMnT26V9kRE1ED20E9LS8PGjRtx5MgRAEB2djY0Gg1SU1MBAOPHj0d6erro9kRE1DzZb84aNWoUsrKykJycDF9fX6jVauTk5DhutKqurnYaw79eeyIiap7soQ8ASUlJSEpKavK9oqIiSe2JiKh5sg/vEBFR+2HoExF5EIY+EZEHYegTEXkQhj4RkQdh6BMReRCXmLLZngTBtkSylLUqANsaGtXV1TAYDLy9HOyPK7EvGrAvnLVXf9jzzJ5v1+JxoV9ZWQkACA0NlbkSIqLWVVlZicDAwGu2UQli/mlwI1arFWfPnoVer4dKJf4pWPbVOU+dOiVpdU53xf5owL5owL5w1l79IQgCKisrERISArX62qP2Hnemr1ar0atXrxbvHxAQwC/zFdgfDdgXDdgXztqjP653hm/HC7lERB6EoU9E5EEY+iLpdDo8++yzTT6QxROxPxqwLxqwL5y5Yn943IVcIiJPxjN9IiIPwtAnIvIgDH0iIg/C0Bdp/fr1iIyMRFRUFKKjo7F//365S2p3ixcvxrBhwxATE+P4NW3aNLnLald1dXVIS0uDl5cXTpw40ej9t99+GyNGjMC4ceOQmJiIM2fOtH+R7eRafTFjxgyMGTPG6bvy17/+VZ5C28G6desQHx+PSZMmITIyEtOnT3fqE0EQ8Nxzz+G2227DqFGjkJKSgkuXLslTrEDXtXPnTkGv1wu//vqrIAiCsGrVKqFnz56CwWCQubL29eyzzwr5+flylyGb48ePC2PGjBEefPBBAYBw/Phxp/c//fRToUePHkJpaakgCIKQkZEhDBs2TLBYLDJU27au1xepqamNtrkzrVYrbNq0SRAEQbBYLMJ//dd/CQMGDBBqa2sFQRCEl19+WRg6dKhQXV0tCIIgzJw5U/jDH/4gS6080xchMzMTiYmJCA8PBwCkpKTAbDYjKytL3sKoXV2+fBlr1qzBzJkzm3x/yZIlSE1NRdeuXQEAc+fOxb59+7Bx48b2LLNdXK8vPM3UqVORkJAAwHbX/+OPP47Dhw+jqKgIFosFmZmZ+Otf/wpfX18AwPz587Fhwwbs3bu33Wtl6IuQl5eHkSNHOl6r1WqMGDECW7ZskbEqam9DhgxBv379mnzv4sWL+Omnn5y+J4GBgejfv79bfk+u1Ree6JNPPnF67ePjAwAwGo3Ys2cPSktLnb4bgwYNgr+/vyzfDYb+dZSVlcFgMKB79+5O24ODg3H8+HGZqpLP//7v/yImJgbjxo1Damoqjh49KndJLsH+XeD3pMHSpUsRExOD8ePH47HHHkNJSYncJbWbH374ASEhIRg3bhyOHTsGwPm7oVKp0L17d1m+Gwz966iurgaARnfU6XQ6x3ueonfv3hg+fDi2bNmC7du346abbsKIESPc+mKlWPyeOOvfvz8mTJiAb775Bvn5+TAajRgzZgwuX74sd2ltzmg04qWXXsLKlSuh1Wpd7rvB0L8OPz8/ALa/yCsZjUbHe55i1qxZePLJJ+Hl5QW1Wo1FixbBx8cHb7zxhtylyY7fE2f/+Mc/8MADD0CtVkOr1eKVV17ByZMn8dFHH8ldWpt7+OGHce+99yIpKQmA6303GPrX0aVLFwQGBjb60bS4uBhhYWEyVeUaNBoN+vbtyyEewPFd4PekaQEBAQgKCnL770paWhr8/Pzw/PPPO7Y1990oKSmR5bvB0BchNjYWhYWFjteCIKCoqAiTJ0+Wsar2N3fu3Ebbzp49i969e8tQjWvp1KkThg8f7vQ9MRgM+PXXXz3uewI0/q4YjUaUlZW59XclMzMTp06dwsqVKwEAhYWFKCwsxNChQxEUFOT03Th48CCqqqrk+W7IMlFUYXbu3CkEBAQIv/32myAIgrBmzRqPnKfft29f4YsvvnC8fvfddwUfHx/h4MGDMlbV/vLz85udpx8SEiJcuHBBEARBeP755912nr5dc33h7e0tFBQUOF4/88wzQlBQkHD+/Pl2rrB9vPnmm8LgwYOFH374QSgoKBAKCgqEZ599Vvjggw8EQbDN04+IiHDM0589e7Zs8/Q97slZLTFq1ChkZWUhOTkZvr6+UKvVyMnJgV6vl7u0dvXCCy/g1VdfxSuvvIK6ujrodDps2bIFAwcOlLu0dlFXV4f4+HhUVFQAAJKTkxEaGuqYrjdt2jScP38ecXFx8PHxQadOnbBhw4brPr5Oia7XF8uXL3dc/6murkZQUBDy8/MRFBQkY9Vto7KyEo899hisVivGjh3r9N4HH3wAAHjyySdx+fJljBs3Dl5eXggPD8fq1avlKJdLKxMReRL3OwUhIqJmMfSJiDwIQ5+IyIMw9ImIPAhDn4jIgzD0iYg8CEOfiMiDMPTJo911113Q6XTo3bs35syZ49j+ww8/QKVS4bfffnNse+aZZ9CrVy9ERkbiwIEDzR7zzJkz6N69u6TVR1euXImBAweib9++12z3+eef4/PPPxd9XKKrMfTJo3311VeYMGEChg8fjn/961+O7Xl5eQCAb775xrFtyZIlGDZsGLZu3Ypbbrml2WP6+PhgwIABjqckifG3v/0NaWlp123H0KcbxdAnjxcbG4tvv/0WFovFse27777D7bff7gh/ADCZTDCZTPD397/m8bp06YJvv/0WnTt3brOaiVqKoU8eLzY2FhUVFSgqKgIA1NbWwmw2449//CPy8/NhX6lk586dGD16NADgpZdewrBhwxAdHY3o6Ghs374dgO2xiTExMfDx8XF6hnJxcTHuvPNO9O/fH3FxccjOzoZKpcKwYcPw73//26merKws3HHHHejXrx8yMzMd2xcsWIBNmzZh06ZNiImJwdSpU9uyW8hNccE18ngjR45EQEAA8vLyEBkZiR07dmDs2LGIjY1FWloa9uzZg4iICHzzzTeIjY3FO++8g/fffx8//vgjOnbsiB07dmDSpEk4dOgQ+vTpg61btzYam58xYwZ8fHxw6NAhqNVqx9LDr776KmJiYhztSkpKoFKp8PXXX2Pfvn0YOnQopk+fjptvvhkvvvgizp8/DwBO/6AQScEzffJ4Go3G8Wg/wDaOP2nSJNx2220IDAx0DPH8+OOPGDt2LF544QX893//Nzp27AgAuP3229GvXz+89957TR7/8OHDyMnJwdy5cx0rbj7++ONNthUEAQ888AAA28PHO3bsiD179rTmH5c8HEOfCLYhnu+//x51dXWOcNdoNIiOjkZeXh5qa2uhVqtRV1eHkydP4oMPPkBMTIzjl8lkQmVlZZPHPnToEAA4PSWpuYeJBAUFwcur4QfwgIAAGAyGVvyTkqfj8A4RbKFfXV2N3NxcaLVax0OsY2NjsWjRImzbtg233367o/38+fMxc+bMFn+eSqVqcrtGo2m0jaufU2vimT4RgKFDh6Jr167IyMjAhAkTHNtjY2NRWVmJZcuWITY2Fnq9Hr1798bhw4ed9l+7di0+/fTTJo9tf8jMsWPHHNtOnjzZojqvfCBLdXW104wjIjEY+kSwnXnHxMSgoKAAsbGxju1DhgxBt27dsHv3bowcORIAkJ6ejlWrVjmCu7S0FBkZGRgyZEiTxx4wYAASEhKwYsUKWK1WAMA777zTojqDgoJQXl4OAPjzn//sGDoiEouhT1QvNjYWAQEBjnAHGv4xmDBhgmOs/aGHHsLTTz+NKVOmICoqCtOnT8err76KAQMGOKZsFhcXIzMz0/GQ7KysLBiNRgwcOBBTpkxxPFZPq9U63s/MzERxcTHi4+MBAHfccYfjOGvWrAEAzJw5E8eOHUNUVBS6du2KwYMHt1v/kHvg4xKJ2kFpaanT82HPnj2Lnj174vTp0+jZs6eMlZGn4Zk+UTt49NFHsW3bNsfr119/HTExMQx8anecvUPUDqZOnYr58+ejQ4cOMBqN6NOnDz766CO5yyIPxOEdIiIPwuEdIiIPwtAnIvIgDH0iIg/C0Cci8iAMfSIiD8LQJyLyIAx9IiIPwtAnIvIgDH0iIg/y/wH1IXyEWB+f1AAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "plot_cumulative_density(pd.read_csv('resources/grn_models/scglue.csv').weight)" + "if True: \n", + " # job_ids_dict_hvg = { \n", + " # 'portia': 7744548,\n", + " # 'grnboost2': 7742249,\n", + " # 'scenic': 7742283,\n", + " # 'genie3': 7742285,\n", + " # 'ppcor': 7742364,\n", + " # 'scglue': 7742343,\n", + " # }\n", + "\n", + " job_ids_dict = { \n", + " 'portia': 7751292,\n", + " 'grnboost2': 7747906,\n", + " 'scenic': 7748219,\n", + " 'ppcor': 7748321,\n", + " 'scglue': [7756286, 7756675],\n", + " 'scenicplus': [7761874, 7760439, 7760554],\n", + " 'figr': 7756664,\n", + " 'celloracle': 7761872\n", + " }\n", + " \n", + " df_res = process_trace_local(job_ids_dict)\n", + " df_res = df_res[['Elapsed', 'MaxVMSize']]\n", + " granie = pd.DataFrame({'Elapsed': 3643.337/60/60, 'MaxVMSize': 41},index=['granie'])\n", + " df_res = pd.concat([df_res, granie], axis=0)\n", + " df_res.columns = ['Duration (hour)', 'Peak memory (GB)']\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Format resourcs used" + "## Merge scores with resources" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "df_scores = pd.read_csv(f\"resources/scores/50000-skeleton_True-binarize_True_pearson-ridge.csv\", index_col=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -756,186 +729,15 @@ " \n", " \n", " \n", - " JobID\n", - " JobName\n", - " AllocCPUS\n", - " Elapsed\n", - " State\n", - " MaxRSS\n", - " MaxVMSize\n", - " \n", - " \n", - " \n", - " \n", - " portia\n", - " 7744548.bat+\n", - " batch\n", - " 20\n", - " 0.153611\n", - " COMPLETED\n", - " 5.854904\n", - " 6.284901\n", - " \n", - " \n", - " grnboost2\n", - " 7742249.bat+\n", - " batch\n", - " 20\n", - " 1.568056\n", - " COMPLETED\n", - " 3.067471\n", - " 3.563801\n", - " \n", - " \n", - " scenic\n", - " 7742283.bat+\n", - " batch\n", - " 20\n", - " 1.908056\n", - " COMPLETED\n", - " 30.356461\n", - " 32.573463\n", - " \n", - " \n", - " genie3\n", - " 7742285.bat+\n", - " batch\n", - " 20\n", - " 16.682500\n", - " COMPLETED\n", - " 13.105103\n", - " 13.563530\n", - " \n", - " \n", - " ppcor\n", - " 7742364.bat+\n", - " batch\n", - " 20\n", - " 0.556667\n", - " COMPLETED\n", - " 3.909119\n", - " 4.283978\n", - " \n", - " \n", - " scglue\n", - " 7742343.bat+\n", - " batch\n", - " 20\n", - " 4.380278\n", - " FAILED\n", - " 29.917423\n", - " 35.933720\n", - " \n", - " \n", - "\n", - "" - ], - "text/plain": [ - " JobID JobName AllocCPUS Elapsed State MaxRSS \\\n", - "portia 7744548.bat+ batch 20 0.153611 COMPLETED 5.854904 \n", - "grnboost2 7742249.bat+ batch 20 1.568056 COMPLETED 3.067471 \n", - "scenic 7742283.bat+ batch 20 1.908056 COMPLETED 30.356461 \n", - "genie3 7742285.bat+ batch 20 16.682500 COMPLETED 13.105103 \n", - "ppcor 7742364.bat+ batch 20 0.556667 COMPLETED 3.909119 \n", - "scglue 7742343.bat+ batch 20 4.380278 FAILED 29.917423 \n", - "\n", - " MaxVMSize \n", - "portia 6.284901 \n", - "grnboost2 3.563801 \n", - "scenic 32.573463 \n", - "genie3 13.563530 \n", - "ppcor 4.283978 \n", - "scglue 35.933720 " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "if False: # HVGs: extract resources local jobs\n", - " job_ids_dict_hvg = {\n", - " 'portia': 7744548,\n", - " 'grnboost2': 7742249,\n", - " 'scenic': 7742283,\n", - " 'genie3': 7742285,\n", - " 'ppcor': 7742364,\n", - " 'scglue': 7742343,\n", - " }\n", - " \n", - " df_resources = process_trace_local(job_ids_dict_hvg)\n", - " df_resources\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Merge scores with resources" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'resources/results/scores/d0_hvg/scgen_pearson-ridge.csv'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[20], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m df_res \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/results/trace/trace_hvg.csv\u001b[39m\u001b[38;5;124m'\u001b[39m, index_col\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m df_scores \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mresources/results/scores/d0_hvg/scgen_pearson-ridge.csv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'resources/results/scores/d0_hvg/scgen_pearson-ridge.csv'" - ] - } - ], - "source": [ - "df_res = pd.read_csv('resources/results/trace/trace_hvg.csv', index_col=0)\n", - "df_scores = pd.read_csv('resources/results/scores/d0_hvg/scgen_pearson-ridge.csv', index_col=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -944,9 +746,10 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -955,121 +758,144 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
method_nameS1S2static-theta-0.0static-theta-0.5overall_scorePeak memory (GB)Duration (hour)method_nameS1S2static-theta-0.0static-theta-0.5static-theta-1.0overall_scoreDuration (hour)Peak memory (GB)
collectri0.0000000.0000000.5008090.0868110.1469050.1620180.2201720.2259040.1216190.0000000.000000
negative_control0.0000000.0000000.0000000.0000000.0000000.0821880.1702470.2243940.0953660.0000000.000000
2positive_control0.5173561.0000000.5716810.7635900.7131574.9000000.0752781.0000000.8425570.7005050.4824370.8051000.0000000.000000
3pearson_corr0.7069730.8798840.9464180.3545230.7219490.9751000.0725000.7982990.8408020.8424040.6878430.4612080.7261110.0000000.000000
4portia0.3908880.3926040.4358630.1173720.3341825.8549040.1536110.4006030.4979540.6391610.3389810.2754280.4304252.49111155.685230
5ppcor0.0599570.1625840.3427330.0381180.1508483.9091190.5566670.0000000.0000000.3479920.2354170.2162360.15992913.42583364.136433
6genie30.9779760.8471630.9525970.6849180.86566313.10510316.682500grnboost20.9716330.7802250.8958900.9462680.7821020.8752237.5105567.378796
7grnboost21.0000000.7944751.0000001.0000000.9486193.0674711.568056scenic0.4640320.3973200.7838950.7450780.8521670.64849824.00861135.954300
8scenic0.3872440.3709150.6911140.6627500.52800630.3564611.908056granie0.2296660.1775680.0000000.0000000.0000000.0814471.01203841.000000
9scglue0.2055180.4126630.4313590.2049130.31361329.9174234.3802780.1903050.5094880.6680410.3075020.2848990.39204711.09750061.677879
10celloracle0.5692350.5380770.7571830.7195050.64600014.9000001.4722220.6167080.4837580.8858440.8210300.6853140.6985313.76500041.601166
11pearson_causal0.0000000.0000000.0000000.0000000.0000000.9747000.064167figr0.3400160.3224620.3050830.3785100.3497870.3391726.731667225.208725
12scenicplus0.9652460.7076351.0000001.0000001.0000000.93457611.740556131.342854
\n", @@ -1077,35 +903,37 @@ ], "text/plain": [ " method_name S1 S2 static-theta-0.0 static-theta-0.5 \\\n", - "0 collectri 0.000000 0.000000 0.500809 0.086811 \n", - "1 negative_control 0.000000 0.000000 0.000000 0.000000 \n", - "2 positive_control 0.517356 1.000000 0.571681 0.763590 \n", - "3 pearson_corr 0.706973 0.879884 0.946418 0.354523 \n", - "4 portia 0.390888 0.392604 0.435863 0.117372 \n", - "5 ppcor 0.059957 0.162584 0.342733 0.038118 \n", - "6 genie3 0.977976 0.847163 0.952597 0.684918 \n", - "7 grnboost2 1.000000 0.794475 1.000000 1.000000 \n", - "8 scenic 0.387244 0.370915 0.691114 0.662750 \n", - "9 scglue 0.205518 0.412663 0.431359 0.204913 \n", - "10 celloracle 0.569235 0.538077 0.757183 0.719505 \n", - "11 pearson_causal 0.000000 0.000000 0.000000 0.000000 \n", + "0 collectri 0.000000 0.000000 0.162018 0.220172 \n", + "1 negative_control 0.000000 0.000000 0.082188 0.170247 \n", + "2 positive_control 1.000000 1.000000 0.842557 0.700505 \n", + "3 pearson_corr 0.798299 0.840802 0.842404 0.687843 \n", + "4 portia 0.400603 0.497954 0.639161 0.338981 \n", + "5 ppcor 0.000000 0.000000 0.347992 0.235417 \n", + "6 grnboost2 0.971633 0.780225 0.895890 0.946268 \n", + "7 scenic 0.464032 0.397320 0.783895 0.745078 \n", + "8 granie 0.229666 0.177568 0.000000 0.000000 \n", + "9 scglue 0.190305 0.509488 0.668041 0.307502 \n", + "10 celloracle 0.616708 0.483758 0.885844 0.821030 \n", + "11 figr 0.340016 0.322462 0.305083 0.378510 \n", + "12 scenicplus 0.965246 0.707635 1.000000 1.000000 \n", "\n", - " overall_score Peak memory (GB) Duration (hour) \n", - "0 0.146905 0.000000 0.000000 \n", - "1 0.000000 0.000000 0.000000 \n", - "2 0.713157 4.900000 0.075278 \n", - "3 0.721949 0.975100 0.072500 \n", - "4 0.334182 5.854904 0.153611 \n", - "5 0.150848 3.909119 0.556667 \n", - "6 0.865663 13.105103 16.682500 \n", - "7 0.948619 3.067471 1.568056 \n", - "8 0.528006 30.356461 1.908056 \n", - "9 0.313613 29.917423 4.380278 \n", - "10 0.646000 14.900000 1.472222 \n", - "11 0.000000 0.974700 0.064167 " + " static-theta-1.0 overall_score Duration (hour) Peak memory (GB) \n", + "0 0.225904 0.121619 0.000000 0.000000 \n", + "1 0.224394 0.095366 0.000000 0.000000 \n", + "2 0.482437 0.805100 0.000000 0.000000 \n", + "3 0.461208 0.726111 0.000000 0.000000 \n", + "4 0.275428 0.430425 2.491111 55.685230 \n", + "5 0.216236 0.159929 13.425833 64.136433 \n", + "6 0.782102 0.875223 7.510556 7.378796 \n", + "7 0.852167 0.648498 24.008611 35.954300 \n", + "8 0.000000 0.081447 1.012038 41.000000 \n", + "9 0.284899 0.392047 11.097500 61.677879 \n", + "10 0.685314 0.698531 3.765000 41.601166 \n", + "11 0.349787 0.339172 6.731667 225.208725 \n", + "12 1.000000 0.934576 11.740556 131.342854 " ] }, - "execution_count": 14, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1135,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1155,11 +983,11 @@ "\u001b[36mℹ\u001b[39m Please use `whereami::thisfile()` instead. \n", "\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[1m\u001b[22mNew names:\n", "\u001b[36m•\u001b[39m `` -> `...1`\n", - "\u001b[1mRows: \u001b[22m\u001b[34m12\u001b[39m \u001b[1mColumns: \u001b[22m\u001b[34m13\u001b[39m\n", + "\u001b[1mRows: \u001b[22m\u001b[34m13\u001b[39m \u001b[1mColumns: \u001b[22m\u001b[34m14\u001b[39m\n", "\u001b[36m──\u001b[39m \u001b[1mColumn specification\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", "\u001b[1mDelimiter:\u001b[22m \"\\t\"\n", "\u001b[31mchr\u001b[39m (1): method_name\n", - "\u001b[32mdbl\u001b[39m (12): ...1, S1, S2, static-theta-0.0, static-theta-0.5, overall_score, P...\n", + "\u001b[32mdbl\u001b[39m (13): ...1, S1, S2, static-theta-0.0, static-theta-0.5, static-theta-1.0...\n", "\n", "\u001b[36mℹ\u001b[39m Use `spec()` to retrieve the full column specification for this data.\n", "\u001b[36mℹ\u001b[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.\n", @@ -1188,8 +1016,8 @@ ], "source": [ "\n", - "summary_file = \"output/summary_d0_hvg.tsv\"\n", - "summary_figure = \"output/summary_d0_hvg_figure.pdf\"\n", + "summary_file = \"output/summary.tsv\"\n", + "summary_figure = \"output/summary_figure.pdf\"\n", "\n", "df_all['memory_log'] = np.log(df_all['Peak memory (GB)']+1)\n", "df_all['memory_log'] = np.max(df_all['memory_log'])-df_all['memory_log']\n", @@ -1201,1373 +1029,1036 @@ "df_all[\"duration_str\"] = df_all['Duration (hour)'].round(1).astype(str)\n", "df_all['memory_str'] = df_all['Peak memory (GB)'].round(1).astype(str)\n", "\n", - "\n", "df_all.to_csv(summary_file, sep='\\t')\n", "\n", - "!Rscript ../grn_benchmark/src/metrics_figure.R {summary_file} {summary_figure}\n" + "!Rscript ../grn_benchmark/src/summary_figure.R {summary_file} {summary_figure}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# All layers" + "# Robustness analysis" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 23, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Submitted batch job 7765072\n" + ] } ], "source": [ - "np.arange(0, 1, .1)" + "if True:\n", + " !sbatch scripts/sbatch/robustness_analysis.sh # !python src/robustness_analysis/script_all.py\n", + "base_dir = 'resources/results/robustness_analysis'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def format_robustness_results(base_dir, noise_type='net'):\n", + " degrees = [0, 10, 20, 50, 100]\n", + " reg1_metric = 'S1'\n", + " reg2_metric = 'static-theta-0.5'\n", + " for i, degree in enumerate(degrees):\n", + " df = pd.read_csv(f'{base_dir}/{noise_type}-{degree}-scores.csv',index_col=0)\n", + " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:degree})\n", + " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:degree})\n", + " \n", + " if i == 0:\n", + " reg1_scores = df_reg1\n", + " reg2_scores = df_reg2\n", + " else:\n", + " reg1_scores = pd.concat([reg1_scores, df_reg1], axis=1)\n", + " reg2_scores = pd.concat([reg2_scores, df_reg2], axis=1)\n", + " \n", + " reg1_scores = reg1_scores.T\n", + " reg2_scores = reg2_scores.T\n", + " return reg1_scores, reg2_scores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Permute net" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# net \n", + "noise_type = 'net'\n", + "reg1_scores, reg2_scores = format_robustness_results(base_dir, noise_type=noise_type)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 collectrinegative_controlpositive_controlpearson_corrportiappcorgenie3grnboost2scenicscgluecelloraclecollectrinegative_controlpositive_controlpearson_corrportiappcorgrnboost2scenicgraniescgluecelloraclefigrscenicplus
lognorm-0.050548-0.0422340.0821010.0469970.0148560.0030780.0732060.0916190.0688100.0273150.068165
pearson-0.095474-0.0411300.1634700.2175110.1131850.0166860.2959200.2991600.1172410.0618480.171422
seurat_lognorm-0.052159-0.0414320.0871520.0535050.0174660.0037230.0817030.1051080.0792170.0315970.078648
seurat_pearson-0.095343-0.0417470.1747730.2198170.1081110.0166150.2936780.3018320.1227650.0648900.181489
scgen_lognorm-0.059849-0.0418160.1604660.0944110.0554870.0087950.1623190.2197270.1478120.0605720.150424
scgen_pearson-0.100238-0.0393050.1971290.2693790.1489410.0228460.3726410.3810320.1475530.0783090.2168970-0.052885-0.0380530.2854820.2279000.114365-0.0090300.2773840.1328380.0655660.0543290.1777730.0970690.275561
10-0.063194-0.0377030.2583000.2013820.094161-0.0160070.2468580.1173730.0525290.0466150.1590760.0857680.259247
20-0.068023-0.0388260.2328500.1886080.076579-0.0254850.2131480.1035060.0437820.0377510.1459420.0792370.243952
50-0.083635-0.0356010.1805670.1475120.026785-0.0415530.1472660.060212nannannannannan
100-0.084617-0.038591-0.009365-0.045844-0.052908-0.064681-0.057610-0.016075-0.004035-0.045894-0.019244-0.013468-0.006252
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 17, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "base_dir = 'resources/scores/d0_hvg'\n", - "layers = ['lognorm','pearson', 'seurat_lognorm', 'seurat_pearson', 'scgen_lognorm', 'scgen_pearson']\n", - "reg_type = 'ridge'\n", - "reg1_metric = 'S1'\n", - "reg2_metric = 'static-theta-0.5'\n", - "for i, layer in enumerate(layers):\n", - " df = pd.read_csv(f'{base_dir}/{layer}-{reg_type}.csv',index_col=0)\n", - " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:layer})\n", - " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:layer})\n", - " \n", - " if i == 0:\n", - " reg1_scores_layers = df_reg1\n", - " reg2_scores_layers = df_reg2\n", - " else:\n", - " reg1_scores_layers = pd.concat([reg1_scores_layers, df_reg1], axis=1)\n", - " reg2_scores_layers = pd.concat([reg2_scores_layers, df_reg2], axis=1)\n", - " \n", - "reg1_scores_layers = reg1_scores_layers.T\n", - "reg2_scores_layers = reg2_scores_layers.T\n", - "\n", - "reg1_scores_layers.to_csv('../grn_benchmark/results_folder/scores/reg1_scores_layers_hvgs.csv')\n", - "reg2_scores_layers.to_csv('../grn_benchmark/results_folder/scores/reg2_scores_layers_hvgs.csv')\n", - "\n", - "reg1_scores_layers.style.background_gradient()\n" + "reg1_scores.style.background_gradient()" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 collectrinegative_controlpositive_controlpearson_corrportiappcorgrnboost2scenicgraniescgluecelloraclecollectrinegative_controlpositive_controlpearson_corrportiappcorgrnboost2scenicgraniescgluecelloraclefigrscenicplus
lognorm-0.050548-0.0437570.0821010.0620310.0107460.0016110.1185050.0750600.0448870.0359100.069066
pearson-0.095474-0.0436000.1634700.2221770.0823290.0146580.3326260.1300750.0705510.0796710.167634
seurat_lognorm-0.052159-0.0443820.0871520.0688950.0115060.0021320.1355750.0807600.0496400.0396610.080605
seurat_pearson-0.095343-0.0428330.1747730.2266110.0872870.0169140.3398410.1367270.0757730.0797680.174873
scgen_lognorm-0.059849-0.0449360.1604660.1179940.0297850.0057620.2632820.1523830.0908620.0718000.153270
scgen_pearson-0.100238-0.0445740.1971290.2734430.1011100.0179540.4219360.1720850.0832980.0998590.20915100.3149040.2984160.4381870.4345870.3375330.3161300.5013730.4473920.2683670.3356750.4672580.3545420.514954
100.3106070.3007990.4330800.4270720.3375330.3143400.4929750.4399820.2630600.3342090.4535220.3562730.506758
200.3123460.2980230.4282190.4165580.3347520.3073810.4811930.4275510.2683670.3306400.4448360.3514140.498718
500.3084450.3008430.4139750.4018970.3222560.3005370.4392320.399823nannannannannan
1000.2957970.3032410.3790010.3450880.2919610.2864240.2934210.3171920.3186270.2897040.3105800.3163540.321514
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 21, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "base_dir = 'resources/scores/'\n", - "layers = ['lognorm','pearson', 'seurat_lognorm', 'seurat_pearson', 'scgen_lognorm', 'scgen_pearson']\n", - "reg_type = 'ridge'\n", - "reg1_metric = 'S1'\n", - "reg2_metric = 'static-theta-0.5'\n", - "for i, layer in enumerate(layers):\n", - " df = pd.read_csv(f'{base_dir}/{layer}-{reg_type}.csv',index_col=0)\n", - " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:layer})\n", - " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:layer})\n", - " \n", - " if i == 0:\n", - " reg1_scores_layers = df_reg1\n", - " reg2_scores_layers = df_reg2\n", - " else:\n", - " reg1_scores_layers = pd.concat([reg1_scores_layers, df_reg1], axis=1)\n", - " reg2_scores_layers = pd.concat([reg2_scores_layers, df_reg2], axis=1)\n", - " \n", - "reg1_scores_layers = reg1_scores_layers.T\n", - "reg2_scores_layers = reg2_scores_layers.T\n", - "\n", - "reg1_scores_layers.to_csv('../grn_benchmark/results_folder/scores/reg1_scores_layers.csv')\n", - "reg2_scores_layers.to_csv('../grn_benchmark/results_folder/scores/reg2_scores_layers.csv')\n", - "\n", - "reg1_scores_layers.style.background_gradient()\n" + "reg2_scores.style.background_gradient()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Robustness analysis" + "## Permute sign" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 12, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Submitted batch job 7752429\n" - ] - } - ], - "source": [ - "# !python src/robustness_analysis/script_all.py\n", - "if True:\n", - " !sbatch scripts/sbatch/robustness_analysis.sh" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Permute" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'resources/results/robustness_analysis/net-0-scores.csv'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m reg2_metric \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstatic-theta-0.5\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, degree \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(degrees):\n\u001b[0;32m----> 8\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mbase_dir\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnoise_type\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m-\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mdegree\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m-scores.csv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m df_reg1 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[:, [reg1_metric]]\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{reg1_metric:degree})\n\u001b[1;32m 10\u001b[0m df_reg2 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[:, [reg2_metric]]\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{reg2_metric:degree})\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", - "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'resources/results/robustness_analysis/net-0-scores.csv'" - ] - } - ], + "outputs": [], "source": [ - "# net \n", - "noise_type = 'net'\n", - "base_dir = 'resources/results/robustness_analysis'\n", - "degrees = [0, 10, 20, 50, 100]\n", - "reg1_metric = 'S1'\n", - "reg2_metric = 'static-theta-0.5'\n", - "for i, degree in enumerate(degrees):\n", - " df = pd.read_csv(f'{base_dir}/{noise_type}-{degree}-scores.csv',index_col=0)\n", - " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:degree})\n", - " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:degree})\n", - " \n", - " if i == 0:\n", - " reg1_scores_layers = df_reg1\n", - " reg2_scores_layers = df_reg2\n", - " else:\n", - " reg1_scores_layers = pd.concat([reg1_scores_layers, df_reg1], axis=1)\n", - " reg2_scores_layers = pd.concat([reg2_scores_layers, df_reg2], axis=1)\n", - " \n", - "reg1_scores_layers = reg1_scores_layers.T\n", - "reg2_scores_layers = reg2_scores_layers.T\n", - "reg1_scores_layers.style.background_gradient()\n" + "noise_type = 'sign'\n", + "reg1_scores, reg2_scores = format_robustness_results(base_dir, noise_type=noise_type)" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 collectrinegative_controlpositive_controlpearson_corrpearson_causalportiappcorgenie3grnboost2scenicscgluecelloraclecollectrinegative_controlpositive_controlpearson_corrportiappcorgrnboost2scenicgraniescgluecelloraclefigrscenicplus
00.5148960.5050020.5746080.5242320.5604900.5180480.5098740.5765800.6090750.5742940.5270760.580147
100.5154060.5056910.5718820.5230670.5555720.5243930.5160360.5708210.6017590.5659420.5308010.573236
200.5111720.5046630.5559540.5261950.5524490.5159560.5152920.5699760.5964150.5616190.5268210.563653
500.5118030.4958540.5529350.5247440.5347640.5160560.5107110.5566610.5819200.5483280.5183660.542993
1000.5055400.5033720.5310050.5137290.5230020.5067600.5132870.5048320.5145850.5040930.5064430.5131580-0.052885-0.0380530.2854820.2279000.114365-0.0090300.2773840.1329160.0655660.0543290.1777730.0970690.275561
10-0.063294-0.0379430.2442530.1759270.068840-0.0264270.2251110.0970680.0431910.0390990.1306930.0661780.232978
20-0.078692-0.0373310.1951380.1439070.030350-0.0415800.1623800.0596980.0238350.0212760.0781840.0490520.183118
50-0.092569-0.034971-0.018533-0.071464-0.036026-0.065349-0.034382-0.023101-0.002446-0.024859-0.027821-0.014739-0.006185
100-0.052885-0.0380530.2854820.2279000.114365-0.0090300.2773840.1329160.0655660.0543290.1777730.0970690.275561
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 48, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "reg2_scores_layers.style.background_gradient()" + "reg1_scores.style.background_gradient()" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 collectrinegative_controlpositive_controlpearson_corrpearson_causalportiappcorgenie3grnboost2scenicscgluecelloraclecollectrinegative_controlpositive_controlpearson_corrportiappcorgrnboost2scenicgraniescgluecelloraclefigrscenicplus
0-0.100238-0.0437950.4891470.2386640.3552560.1489410.0228460.3726410.3810320.1475530.0783090.216897
10-0.107958-0.0421990.4332460.1706120.2973540.1034480.0185510.3092430.3204760.1058500.0515650.148519
20-0.129616-0.0422580.3831690.1095020.2592010.064159-0.0018100.2030460.2295620.0674770.0269450.080443
50-0.154785-0.040425-0.091824-0.000377-0.188567-0.090789-0.013036-0.114169-0.122291-0.025912-0.010141-0.135087
100-0.100238-0.0437950.4891470.2386640.3552560.1489410.0228460.3726410.3810320.1475530.0783090.21689700.3149040.2984160.4381870.4345870.3433060.3161300.5013730.4473920.2570130.3356750.4672580.3545420.514954
100.3152330.2985460.4381870.4345870.3433060.3161300.5013730.4473920.2570630.3356750.4672580.3545420.514954
200.3152060.2986450.4381870.4345870.3433060.3161300.5013730.4473920.2569430.3356750.4672580.3545420.514954
500.3154450.2990330.4381870.4345870.3433060.3161300.5013730.4473920.2568910.3356750.4672580.3545420.514954
1000.3144120.2985920.4381870.4345870.3433060.3161300.5013730.4473920.2571290.3356750.4672580.3545420.514954
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "noise_type = 'sign'\n", - "base_dir = 'resources/results/robustness_analysis'\n", - "degrees = [0, 10, 20, 50, 100]\n", - "reg1_metric = 'S1'\n", - "reg2_metric = 'static-theta-0.5'\n", - "for i, degree in enumerate(degrees):\n", - " df = pd.read_csv(f'{base_dir}/{noise_type}-{degree}-scores.csv',index_col=0)\n", - " df_reg1 = df.loc[:, [reg1_metric]].rename(columns={reg1_metric:degree})\n", - " df_reg2 = df.loc[:, [reg2_metric]].rename(columns={reg2_metric:degree})\n", - " \n", - " if i == 0:\n", - " reg1_scores_layers = df_reg1\n", - " reg2_scores_layers = df_reg2\n", - " else:\n", - " reg1_scores_layers = pd.concat([reg1_scores_layers, df_reg1], axis=1)\n", - " reg2_scores_layers = pd.concat([reg2_scores_layers, df_reg2], axis=1)\n", - " \n", - "reg1_scores_layers = reg1_scores_layers.T\n", - "reg2_scores_layers = reg2_scores_layers.T\n", - "reg1_scores_layers.style.background_gradient()" + "reg2_scores.style.background_gradient()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Permute weight" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 15, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
 collectrinegative_controlpositive_controlpearson_corrpearson_causalportiappcorgenie3grnboost2scenicscgluecelloracle
00.5148960.5050020.5746080.5242320.5604900.5180480.5098740.5765800.6090750.5742940.5270760.580147
100.5141130.5047080.5746080.5242320.5604900.5180480.5098740.5765800.6090750.5742940.5270760.580147
200.5138570.5058990.5746080.5242320.5604900.5180480.5098740.5765800.6090750.5742940.5270760.580147
500.5120820.5063970.5746080.5242320.5604900.5180480.5098740.5765800.6090750.5742940.5270760.580147
1000.5083490.5082020.5746080.5242320.5604900.5180480.5098740.5765800.6090750.5742940.5270760.580147
\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'resources/results/robustness_analysis/weight-20-scores.csv'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[15], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m noise_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 2\u001b[0m reg1_scores, reg2_scores \u001b[38;5;241m=\u001b[39m \u001b[43mformat_robustness_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnoise_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnoise_type\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[7], line 6\u001b[0m, in \u001b[0;36mformat_robustness_results\u001b[0;34m(base_dir, noise_type)\u001b[0m\n\u001b[1;32m 4\u001b[0m reg2_metric \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstatic-theta-0.5\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, degree \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(degrees):\n\u001b[0;32m----> 6\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mbase_dir\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnoise_type\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m-\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mdegree\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m-scores.csv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m df_reg1 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[:, [reg1_metric]]\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{reg1_metric:degree})\n\u001b[1;32m 8\u001b[0m df_reg2 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[:, [reg2_metric]]\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{reg2_metric:degree})\n", + "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", + "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", + "File \u001b[0;32m~/miniconda3/envs/py10/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'resources/results/robustness_analysis/weight-20-scores.csv'" + ] } ], "source": [ - "reg2_scores_layers.style.background_gradient()" + "noise_type = 'weight'\n", + "reg1_scores, reg2_scores = format_robustness_results(base_dir, noise_type=noise_type)" ] }, { @@ -2579,7 +2070,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ diff --git a/scripts/sbatch/calculate_scores.sh b/scripts/sbatch/calculate_scores.sh index 9533028d9..40b537003 100644 --- a/scripts/sbatch/calculate_scores.sh +++ b/scripts/sbatch/calculate_scores.sh @@ -5,7 +5,7 @@ #SBATCH --error=logs/%j.err #SBATCH --mail-type=END #SBATCH --mail-user=jalil.nourisa@gmail.com -#SBATCH --mem=64G +#SBATCH --mem=250G #SBATCH --cpus-per-task=20 python src/metrics/script_all.py diff --git a/scripts/sbatch/robustness_analysis.sh b/scripts/sbatch/robustness_analysis.sh index 90c529a6d..a6dbf7883 100644 --- a/scripts/sbatch/robustness_analysis.sh +++ b/scripts/sbatch/robustness_analysis.sh @@ -5,7 +5,9 @@ #SBATCH --error=logs/%j.err #SBATCH --mail-type=END #SBATCH --mail-user=jalil.nourisa@gmail.com -#SBATCH --mem=64G -#SBATCH --cpus-per-task=20 +#SBATCH --mem=250G +#SBATCH --cpus-per-task=20 +# SBATCH --partition=gpu +# SBATCH --gres=gpu:1 python src/robustness_analysis/script_all.py diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml index 8c97254be..89dac561f 100644 --- a/src/api/comp_method.yaml +++ b/src/api/comp_method.yaml @@ -47,6 +47,10 @@ functionality: type: boolean direction: input default: True + - name: --causal + type: boolean + direction: input + default: True test_resources: diff --git a/src/control_methods/pearson_corr/config.vsh.yaml b/src/control_methods/pearson_corr/config.vsh.yaml index 667b88d1c..3f597eb98 100644 --- a/src/control_methods/pearson_corr/config.vsh.yaml +++ b/src/control_methods/pearson_corr/config.vsh.yaml @@ -6,6 +6,11 @@ functionality: info: label: pearson_corr summary: "Baseline based on correlation" + arguments: + - name: --normalize + type: boolean + default: True + direction: input resources: - type: python_script diff --git a/src/control_methods/pearson_corr/script.py b/src/control_methods/pearson_corr/script.py index 3020b57de..4868da533 100644 --- a/src/control_methods/pearson_corr/script.py +++ b/src/control_methods/pearson_corr/script.py @@ -14,22 +14,35 @@ 'normalize': False, 'donor_specific': False, 'temp_dir': 'output/pearson_corr', - 'causal': True} + 'causal': True, + 'normalize': True} ## VIASH END import argparse parser = argparse.ArgumentParser(description="Process multiomics RNA data.") parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file') -parser.add_argument('--multiomics_atac', type=str, help='Path to the multiomics RNA file') parser.add_argument('--prediction', type=str, help='Path to the prediction file') -parser.add_argument('--resources_dir', type=str, help='Path to the prediction file') parser.add_argument('--tf_all', type=str, help='Path to the tf_all') parser.add_argument('--num_workers', type=str, help='Number of cores') parser.add_argument('--max_n_links', type=str, help='Number of top links to retain') +parser.add_argument('--causal', action='store_true', help='Enable causal mode') +parser.add_argument('--normalize', action='store_true') + args = parser.parse_args() + if args.multiomics_rna: par['multiomics_rna'] = args.multiomics_rna +if args.causal: + par['causal'] = True +else: + par['causal'] = False + +if args.causal: + par['normalize'] = True +else: + par['normalize'] = False + if args.prediction: par['prediction'] = args.prediction if args.tf_all: @@ -38,9 +51,6 @@ par['num_workers'] = args.num_workers if args.max_n_links: par['max_n_links'] = int(args.max_n_links) -if args.resources_dir: - meta['resources_dir'] = args.resources_dir - os.makedirs(par['temp_dir'], exist_ok=True) import sys @@ -59,9 +69,11 @@ def create_corr_net(par): print(par) print('Read data') adata = ad.read_h5ad(par["multiomics_rna"]) - # - lognorm - sc.pp.normalize_total(adata) - sc.pp.log1p(adata) + if 'normalize' in par: + if par['normalize']: + # - lognorm + sc.pp.normalize_total(adata) + sc.pp.log1p(adata) # - corr gene_names = adata.var_names.to_numpy() grn = corr_net(adata.X, gene_names, par) diff --git a/src/exp_analysis/helper.py b/src/exp_analysis/helper.py index 4ad8b7a32..8d99d0ae7 100644 --- a/src/exp_analysis/helper.py +++ b/src/exp_analysis/helper.py @@ -106,7 +106,7 @@ def cosine_similarity_net(nets_dict, col_name='source', weight_col='weight', fig return cosine_sim_matrix, fig -def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='jaccard Similarity', ax=None): +def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='jaccard Similarity', ax=None, fmt='.02f'): from itertools import combinations import seaborn as sns import matplotlib.pyplot as plt @@ -138,7 +138,7 @@ def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='ja fig, ax = plt.subplots(1, 1, figsize=figsize) else: fig = None - sns.heatmap(jaccard_matrix, annot=True, cmap="coolwarm", xticklabels=nets_names, yticklabels=nets_names, ax=ax) + sns.heatmap(jaccard_matrix, annot=True, cmap="viridis", xticklabels=nets_names, yticklabels=nets_names, ax=ax, fmt=fmt, cbar=None) ax.grid(False) ax.set_title(title) # Rotate x labels for readability @@ -146,7 +146,7 @@ def jaccard_similarity_net(nets_dict, col_name='link', figsize=(4, 4), title='ja return jaccard_matrix, fig -def plot_cumulative_density(data, label='', ax=None, title=None, label=None, s=1, x_label='Weight', **kwdgs): +def plot_cumulative_density(data, ax=None, title=None, label=None, s=3, x_label='Weight', **kwdgs): # Sort the data sorted_data = np.sort(data) # Compute the cumulative density values @@ -159,7 +159,9 @@ def plot_cumulative_density(data, label='', ax=None, title=None, label=None, s=1 ax.set_xlabel(x_label) ax.set_ylabel('Cumulative Density') ax.set_title(title) - ax.grid(True) + ax.grid(True, linewidth=0.2, linestyle='--', color='gray') + for side in ['right', 'top']: + ax.spines[side].set_visible(False) return fig, ax class Connectivity: @@ -224,6 +226,37 @@ def diff_roles(control: pd.DataFrame, sample: pd.DataFrame, critical_change_q_t: # df_distance['critical_change_ps'] = df_distance['ps_distance'] > df_distance['ps_distance'].quantile(critical_change_q_t) # df_distance['critical_change_overall'] = df_distance['overall_distance'] > df_distance['overall_distance'].quantile(critical_change_q_t) return df_distance +def find_peak_intersection(peaks, peaks_ref): + '''Find those peaks_ref intersect with peaks''' + # Convert arrays to structured data (chr, start, end) + def split_peaks(peak_array): + split_data = [] + for p in peak_array: + try: + chr_, range_ = p.split(':') + start, end = map(int, range_.split('-')) + split_data.append((chr_, start, end)) + except ValueError: + continue # Skip malformed peaks + return np.array(split_data, dtype=object) + + peaks_struct = split_peaks(peaks) + peaks_ref_struct = split_peaks(peaks_ref) + + # Optimize with NumPy broadcasting for faster intersection check + intersecting_peaks_ref = [] + + chr_peaks = peaks_struct[:, 0] + start_peaks = peaks_struct[:, 1].astype(int) + end_peaks = peaks_struct[:, 2].astype(int) + + for chr_ref, start_ref, end_ref in peaks_ref_struct: + # Vectorized filtering for chromosome and overlap conditions + mask = (chr_peaks == chr_ref) & (start_peaks <= end_ref) & (end_peaks >= start_ref) + if np.any(mask): + intersecting_peaks_ref.append(f"{chr_ref}:{start_ref}-{end_ref}") + + return intersecting_peaks_ref class Exp_analysis: ''' This class provides functions for explanatory analysis of GRNs @@ -232,8 +265,10 @@ def __init__(self, net, peak_gene_net=None): self.net = net # self.net.weight = minmax_scale(self.net.weight) self.net['link'] = self.net['source'].astype(str) + '_' + self.net['target'].astype(str) - self.peak_gene_net = peak_gene_net + if self.peak_gene_net is not None: + if 'peak' in peak_gene_net.columns: + peak_gene_net.rename(columns={'peak': 'source'}, inplace=True) self.tfs = net.source.unique() self.targets = net.target.unique() # check duplicates @@ -251,7 +286,7 @@ def __init__(self, net, peak_gene_net=None): def plot_centrality_barh(df, title='',ax=None, xlabel='Degree', ylabel='Gene', colors=None): if ax==None: fig, ax = plt.subplots(figsize=(10, 6)) - df['degree'].plot(kind='barh', color='skyblue', ax=ax) # Pass ax to the plot method + df.plot(kind='barh', color='skyblue', ax=ax) # Pass ax to the plot method ax.set_title(title) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) @@ -293,15 +328,33 @@ def subset_quantile(df, col_name='weight', top_q=0.95, top_n=None, ascending=Fal else: df = df.sort_values(by=col_name, ascending=ascending, key=abs)[:top_n] return df + + def annotate_peaks(self, annotation_df) -> dict[str, float]: '''Annotate peaks with associated regions on genome. ''' if self.peak_gene_net is None: print('Peak gene net is not given. Peak annoation is skipped.') return - peaks = self.peak_gene_net.peak.unique() + # print(self.peak_gene_net) + peaks = self.peak_gene_net.source.unique() peaks = self.format_peak(peaks) - annotation_df = annotation_df[annotation_df.peak.isin(peaks)] + + annotation_peaks = annotation_df.peak.unique() + + flag = False + for peak in peaks: + if peak not in annotation_peaks: + flag = True + + + if flag: + print('Not all peaks in the net is among the annotated ones. Finding the overlap') + peaks = find_peak_intersection(peaks, annotation_df.peak.unique()) + annotation_df = annotation_df[annotation_df.peak.isin(peaks)] + else: + annotation_df = annotation_df[annotation_df.peak.isin(peaks)] + value_counts = annotation_df.annotation.value_counts() sum_values = value_counts.sum() value_ratio = ((value_counts/sum_values)*100).round(1) @@ -343,7 +396,7 @@ def calculate_feature(net, feature='active_sum_weight'): return df -def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_degree=None, color_map=None) -> plt.Figure: +def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_degree=None, color_map=None, sort_by='degree') -> plt.Figure: """Upset plot of interactions Args: @@ -359,7 +412,7 @@ def plot_interactions(interaction_df: pd.DataFrame, min_subset_size=None, min_de out_dict = upsetplot.plot(upsetplot.from_indicators(indicators=lambda a: a==True, data=interaction_df), fig=fig, show_counts=True, show_percentages = '{:.0%}', - # sort_by='cardinality', + sort_by=sort_by, #'cardinality' # min_subset_size =".1%", # min interaction to show min_subset_size = min_subset_size, # min interaction to show min_degree=min_degree, diff --git a/src/helper.py b/src/helper.py index f21a3c6e7..11f2ce135 100644 --- a/src/helper.py +++ b/src/helper.py @@ -46,7 +46,6 @@ def calculate_scores(): "sbatch", "scripts/sbatch/calculate_scores.sh" ] - # Print command to verify subprocess.run(command) @@ -84,46 +83,56 @@ def run_grn_seqera(): def run_grn_inference(): # par = { # 'methods': ['portia'], - # 'models_dir': 'resources/grn_models/', - # 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad', - # 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad', + # 'models_dir': 'resources/grn_models/mccalla/han', + # 'multiomics_rna': 'resources/grn-benchmark/mccalla/inference/han.h5ad', # 'num_workers': 20, - # 'mem': "120GB", - # 'time': "24:00:00" - # } + # 'mem': "250GB", + # 'time': "48:00:00", + # 'max_n_links': 100000, + # 'causal': False, + # 'normalize': False + # } par = { - 'methods': ["pearson_corr", "positive_control", "negative_control"], + 'methods': ['scenicplus'], 'models_dir': 'resources/grn_models/', 'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad', - 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad', + 'multiomics_atac': 'resources/grn-benchmark/multiomics_atac.h5ad', 'num_workers': 20, - 'mem': "120GB", - 'time': "24:00:00" + 'mem': "400GB", + 'time': "48:00:00", + 'causal': False, + 'normalize': True, + 'max_n_links': 100000, } for method in par['methods']: + print(method) par['prediction'] = f"{par['models_dir']}/{method}.csv" # Method arguments method_args = (f"--multiomics_rna {par['multiomics_rna']} " - f"--multiomics_atac {par['multiomics_atac']} " f"--prediction {par['prediction']} " f"--num_workers {par['num_workers']} " - f"--resources_dir src/utils") + f"--max_n_links {par['max_n_links']} ") + if par['causal']: + method_args += f"--causal " # Determine the command based on the method if method in ["pearson_corr", "positive_control", "negative_control"]: + if par['normalize']: + method_args += f"--normalize " command = f"python src/control_methods/{method}/script.py {method_args}" elif method == "celloracle": + method_args += f"--multiomics_atac {par['multiomics_atac']} " command = (f"/home/jnourisa/miniconda3/envs/celloracle/bin/python " f"src/methods/multi_omics/celloracle/script.py {method_args}") elif method in ["grnboost2", "scenic", "genie3"]: command = f"singularity exec ../../images/scenic python src/methods/single_omics/{method}/script.py {method_args}" elif method == 'scglue': + method_args += f"--multiomics_atac {par['multiomics_atac']} " command = f"singularity exec ../../images/scglue python src/methods/multi_omics/{method}/script.py {method_args}" - elif method == 'ppcor': - command = f"singularity exec ../../images/ppcor Rscript src/methods/single_omics/{method}/script.R {method_args}" elif method == 'scenicplus': + method_args += f"--multiomics_atac {par['multiomics_atac']} " command = f"singularity exec ../../images/scenicplus python src/methods/multi_omics/{method}/script.py {method_args}" else: command = f"singularity exec ../../images/{method} python src/methods/single_omics/{method}/script.py {method_args}" @@ -139,11 +148,12 @@ def run_grn_inference(): # Add GPU partition if method is 'scglue' if method == 'scglue': full_tag += ["--partition=gpu", "--gres=gpu:1"] - + if True: + full_tag += ["--partition=gpu", "--gres=gpu:1"] # Run sbatch command try: - # result = subprocess.run(['sbatch'] + full_tag + ['scripts/sbatch/grn_inference.sh', command], check=True, capture_output=True, text=True) - result = subprocess.run(['bash'] + ['scripts/sbatch/grn_inference.sh', command], check=True, capture_output=True, text=True) + result = subprocess.run(['sbatch'] + full_tag + ['scripts/sbatch/grn_inference.sh', command], check=True, capture_output=True, text=True) + # result = subprocess.run(['bash'] + ['scripts/sbatch/grn_inference.sh', command], check=True, capture_output=True, text=True) print(f"Job {method} submitted successfully.") print(result.stdout) # Print the standard output @@ -201,6 +211,31 @@ def marco_data(): # print(f"{cell_type}-{GT}. adata shape: {adata.shape}, GT size: {GT_df.shape}, Gene overlap: {gene_overlap}") # command = f"viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources_local/mccalla_extended/{cell_type}.h5ad --prediction resources_local/mccalla_extended/{cell_type}_{GT}.csv --layer norm --subsample {subsample} --apply_tf false --tf_all resources/prior/tf_all.csv --max_n_links -1 --verbose 1 --score output/{cell_type}_{GT}.h5ad" # subprocess.run(command, shell=True, check=True) + + + all_gene_names = pd.read_csv('resources/prior/genome_annotation.tsv', sep='\t').Gene.unique().astype(str) + + # adata_names = ['han', 'jackson', 'zhao', 'shalek'] + # post_fixes = ['chipunion','KDunion', 'chipunion_KDUnion_intersect'] + # for name in adata_names: + # print('------- ', name) + # adata = ad.read_h5ad(f'resources/grn-benchmark/mccalla/inference/{name}.h5ad') + # adata.var_names = adata.var_names.str.upper().astype(str) + + # genes = adata.var_names + # print(len(genes),genes.isin(all_gene_names).sum()) + # print(np.setdiff1d(genes, all_gene_names)[0:5]) + # adata.write_h5ad(f'resources/grn-benchmark/mccalla/inference/{name}.h5ad') + + # for post_fix in post_fixes: + # GT = pd.read_csv(f'resources/grn-benchmark/mccalla/evaluation/{name}_{post_fix}.csv') + # GT.source = GT.source.str.upper() + # GT.target = GT.target.str.upper() + # GT.to_csv(f'resources/grn-benchmark/mccalla/evaluation/{name}_{post_fix}.csv') + # tf_genes =set(GT.source) | set(GT.target) + # tf_genes = [name.upper() for name in tf_genes] + # # print(tf_genes) + # print('-- ', post_fix, len(tf_genes), np.intersect1d(list(tf_genes), genes).shape) pass def extract_data(data, reg='reg1', dataset_id='scgen_pearson'): @@ -300,22 +335,39 @@ def elapsed_to_hours(elapsed_str): time = time[0] h, m, s = map(int, time.split(':')) return day*24 + h + m / 60 + s / 3600 + def reformat_data(df_local): + # Remove 'K' and convert to integers + df_local['MaxRSS'] = df_local['MaxRSS'].str.replace('K', '').astype(int) + df_local['MaxVMSize'] = df_local['MaxVMSize'].str.replace('K', '').astype(int) + df_local['Elapsed'] = df_local['Elapsed'].apply(lambda x: (elapsed_to_hours(x))) + + # Convert MaxRSS and MaxVMSize from KB to GB + df_local['MaxRSS'] = df_local['MaxRSS'] / (1024 ** 2) # Convert KB to GB + df_local['MaxVMSize'] = df_local['MaxVMSize'] / (1024 ** 2) # Convert KB to GB + return df_local for i, (name, job_id) in enumerate(job_ids_dict.items()): - df = get_sacct_data(job_id) + if type(job_id)==list: + + for i_sub, job_id_ in enumerate(job_id): + df_ = get_sacct_data(job_id_) + df_ = reformat_data(df_) + if i_sub == 0: + df = df_ + else: + concat_df = pd.concat([df, df_], axis=0) + df['MaxVMSize'] = concat_df['MaxVMSize'].max() + df['MaxRSS'] = concat_df['MaxRSS'].max() + df['Elapsed'] = concat_df['Elapsed'].sum() + else: + df = get_sacct_data(job_id) + df = reformat_data(df) df.index = [name] if i==0: df_local = df else: df_local = pd.concat([df_local, df], axis=0) - # Remove 'K' and convert to integers - df_local['MaxRSS'] = df_local['MaxRSS'].str.replace('K', '').astype(int) - df_local['MaxVMSize'] = df_local['MaxVMSize'].str.replace('K', '').astype(int) - df_local['Elapsed'] = df_local['Elapsed'].apply(lambda x: (elapsed_to_hours(x))) - - # Convert MaxRSS and MaxVMSize from KB to GB - df_local['MaxRSS'] = df_local['MaxRSS'] / (1024 ** 2) # Convert KB to GB - df_local['MaxVMSize'] = df_local['MaxVMSize'] / (1024 ** 2) # Convert KB to GB + return df_local diff --git a/src/methods/multi_omics/celloracle/config.vsh.yaml b/src/methods/multi_omics/celloracle/config.vsh.yaml index bc976fe5f..350fbf2e1 100644 --- a/src/methods/multi_omics/celloracle/config.vsh.yaml +++ b/src/methods/multi_omics/celloracle/config.vsh.yaml @@ -14,6 +14,7 @@ functionality: type: file direction: output default: output/celloracle/base_grn.csv + # - name: --links # type: file # direction: output diff --git a/src/methods/multi_omics/celloracle/main.py b/src/methods/multi_omics/celloracle/main.py index a3307c288..320a418b9 100644 --- a/src/methods/multi_omics/celloracle/main.py +++ b/src/methods/multi_omics/celloracle/main.py @@ -14,8 +14,7 @@ def base_grn(par) -> None: print("Reading atac data") multiomics_atac = ad.read_h5ad(par["multiomics_atac"]) - # genomes_dir = par['temp_dir'] - genomes_dir = None + print("Format peak data") peaks = multiomics_atac.var_names.to_numpy() peaks = [peak.replace(':','_').replace("-",'_') for peak in peaks] @@ -23,18 +22,22 @@ def base_grn(par) -> None: tss_annotated['peak_id'] = tss_annotated['chr'].astype(str)+"_"+tss_annotated['start'].astype(str)+"_"+tss_annotated['end'].astype(str) peak_gene = tss_annotated - print("Install ref genome") - genomepy.install_genome(name="hg38", provider="UCSC", genomes_dir=genomes_dir) + try: + print("Install ref genome") + genomepy.install_genome(name="hg38", provider="UCSC", genomes_dir=None) + except: + print("Couldnt install genome. Will look for the default location") + ref_genome = "hg38" genome_installation = ma.is_genome_installed(ref_genome=ref_genome, - genomes_dir=genomes_dir) + genomes_dir=None) print(ref_genome, "installation: ", genome_installation) print("Instantiate TFinfo object") tfi = ma.TFinfo(peak_data_frame=peak_gene, - ref_genome="hg38", - genomes_dir=genomes_dir) + ref_genome=ref_genome, + genomes_dir=None) print("Motif scan") tfi.scan(fpr=0.05, motifs=None, # If you enter None, default motifs will be loaded. diff --git a/src/methods/multi_omics/celloracle/script.py b/src/methods/multi_omics/celloracle/script.py index da341a7df..c7dfc33f4 100644 --- a/src/methods/multi_omics/celloracle/script.py +++ b/src/methods/multi_omics/celloracle/script.py @@ -6,13 +6,12 @@ ## VIASH START par = { - "multiomics_rna": "resources_test/grn-benchmark/multiomics_rna_d0_hvg.h5ad", - "multiomics_atac": "resources_test/grn-benchmark/multiomics_atac.h5ad", + "multiomics_rna": "resources/grn-benchmark/multiomics_rna.h5ad", + "multiomics_atac": "resources/grn-benchmark/multiomics_atac.h5ad", "base_grn": 'output/celloracle/base_grn.csv', "temp_dir": 'output/celloracle/', "num_workers": 10, - "prediction": "output/celloracle_test.h5ad", -} + "prediction": "output/celloracle.h5ad"} ## VIASH END parser = argparse.ArgumentParser(description="Process multiomics RNA data.") @@ -21,7 +20,8 @@ parser.add_argument('--prediction', type=str, help='Path to the prediction file') parser.add_argument('--resources_dir', type=str, help='Path to the prediction file') parser.add_argument('--tf_all', type=str, help='Path to the tf_all') -parser.add_argument('--num_workers', type=str, help='Number of cores') +parser.add_argument('--num_workers', type=int, help='Number of cores') +parser.add_argument('--max_n_links', type=int) args = parser.parse_args() if args.multiomics_rna: @@ -34,38 +34,25 @@ par['tf_all'] = args.tf_all if args.num_workers: par['num_workers'] = args.num_workers +if args.max_n_links: + par['max_n_links'] = args.max_n_links if args.resources_dir: + meta = {} meta['resources_dir'] = args.resources_dir - - -par['links'] = f"{par['temp_dir']}/links.celloracle.links" - - -import argparse -parser = argparse.ArgumentParser(description="Process multiomics RNA data.") -parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file') -parser.add_argument('--prediction', type=str, help='Path to the prediction file') -parser.add_argument('--resources_dir', type=str, help='Path to the prediction file') -parser.add_argument('--tf_all', type=str, help='Path to the tf_all') -parser.add_argument('--num_workers', type=str, help='Number of cores') -args = parser.parse_args() - -if args.multiomics_rna: - par['multiomics_rna'] = args.multiomics_rna -if args.prediction: - par['prediction'] = args.prediction -if args.tf_all: - par['tf_all'] = args.tf_all -if args.num_workers: - par['num_workers'] = args.num_workers - -if args.resources_dir: - meta['resources_dir'] = args.resources_dir - -sys.path.append(meta["resources_dir"]) +try: + meta['resources_dir'] = args.resources_dir +except: + pass from main import main os.makedirs(par['temp_dir'], exist_ok=True) + + +if 'base_grn' not in par: + par['base_grn'] = f"{par['temp_dir']}/base_grn.csv" +if 'links' not in par: + par['links'] = f"{par['temp_dir']}/links.celloracle.links" + prediction = main(par) print('Write output to file', flush=True) diff --git a/src/methods/multi_omics/scenicplus/main.py b/src/methods/multi_omics/scenicplus/main.py index e5d4153cd..f9b568a36 100644 --- a/src/methods/multi_omics/scenicplus/main.py +++ b/src/methods/multi_omics/scenicplus/main.py @@ -333,8 +333,8 @@ def run_cistopic(par): # LDA-based topic modeling print('Run LDA models', flush=True) - # n_topics = [2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50] - n_topics = [40] #TODO: fix this + n_topics = [2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50] + # n_topics = [40] #TODO: fix this if os.path.exists(par['MALLET_PATH']): models = run_cgs_models_mallet( cistopic_obj, @@ -703,17 +703,17 @@ def preprocess_rna(par): def snakemake_pipeline(par): import os snakemake_dir = os.path.join(par['temp_dir'], 'scplus_pipeline', 'Snakemake') - # if os.path.exists(snakemake_dir): - # import shutil - # shutil.rmtree(snakemake_dir) + if os.path.exists(snakemake_dir): + import shutil + shutil.rmtree(snakemake_dir) os.makedirs(os.path.join(par['temp_dir'], 'scplus_pipeline'), exist_ok=True) os.makedirs(os.path.join(par['temp_dir'], 'scplus_pipeline', 'temp'), exist_ok=True) pipeline_dir = os.path.join(par['temp_dir'], 'scplus_pipeline') - if not os.path.exists(pipeline_dir): - subprocess.run(['scenicplus', 'init_snakemake', '--out_dir', pipeline_dir]) - print('snake make initialized', flush=True) + # if not os.path.exists(pipeline_dir): + subprocess.run(['scenicplus', 'init_snakemake', '--out_dir', pipeline_dir]) + print('snake make initialized', flush=True) # Load pipeline settings with open(os.path.join(snakemake_dir, 'config', 'config.yaml'), 'r') as f: diff --git a/src/methods/multi_omics/scenicplus/script.py b/src/methods/multi_omics/scenicplus/script.py index 876ecbe7b..d9478ddd7 100644 --- a/src/methods/multi_omics/scenicplus/script.py +++ b/src/methods/multi_omics/scenicplus/script.py @@ -23,6 +23,7 @@ parser.add_argument('--resources_dir', type=str, help='Path to the prediction file') parser.add_argument('--tf_all', type=str, help='Path to the tf_all') parser.add_argument('--num_workers', type=str, help='Number of cores') +parser.add_argument('--max_n_links', type=int) args = parser.parse_args() if args.multiomics_rna: @@ -33,6 +34,8 @@ par['prediction'] = args.prediction if args.tf_all: par['tf_all'] = args.tf_all +if args.max_n_links: + par['max_n_links'] = args.max_n_links if args.num_workers: par['num_workers'] = args.num_workers @@ -42,8 +45,10 @@ meta['resources_dir'] = args.resources_dir par['num_workers'] = int(par['num_workers']) print(par) - -sys.path.append(meta["resources_dir"]) +try: + sys.path.append(meta["resources_dir"]) +except: + pass from main import * @@ -69,22 +74,22 @@ def main(par): par['MALLET_PATH'] = os.path.join(par['temp_dir'], 'Mallet-202108', 'bin', 'mallet') os.makedirs(par['atac_dir'], exist_ok=True) - print('------- download_databases -------') - download_databases(par) - print_memory_usage() - print('------- process_peak -------') - process_peak(par) - print_memory_usage() - print('------- run_cistopic -------') - run_cistopic(par) - print_memory_usage() - print('------- process_topics -------') - process_topics(par) - print_memory_usage() - print('------- preprocess_rna -------') - preprocess_rna(par) - print_memory_usage() - print('------- snakemake_pipeline -------') + # print('------- download_databases -------') + # download_databases(par) + # print_memory_usage() + # print('------- process_peak -------') + # process_peak(par) + # print_memory_usage() + # print('------- run_cistopic -------') + # run_cistopic(par) + # print_memory_usage() + # print('------- process_topics -------') + # process_topics(par) + # print_memory_usage() + # print('------- preprocess_rna -------') + # preprocess_rna(par) + # print_memory_usage() + # print('------- snakemake_pipeline -------') snakemake_pipeline(par) print_memory_usage() print('------- post_process -------') diff --git a/src/methods/multi_omics/scglue/main.py b/src/methods/multi_omics/scglue/main.py index 598788822..5e9d8a7b2 100644 --- a/src/methods/multi_omics/scglue/main.py +++ b/src/methods/multi_omics/scglue/main.py @@ -306,20 +306,20 @@ def main(par): from util import process_links # Load scRNA-seq data - # os.makedirs(par['temp_dir'], exist_ok=True) - # print('----- download_annotation ---- ', flush=True) - # download_annotation(par) - # print('----- download_motifs ---- ', flush=True) - # download_motifs(par) - # print('----- preprocess ---- ', flush=True) - # preprocess(par) - # print('----- training ---- ', flush=True) - # training(par) - # print('----- create_prior ---- ', flush=True) - # create_prior(par) - # print('----- pyscenic_grn ---- ', flush=True) - # pyscenic_grn(par) - # print('----- prune_grn ---- ', flush=True) + os.makedirs(par['temp_dir'], exist_ok=True) + print('----- download_annotation ---- ', flush=True) + download_annotation(par) + print('----- download_motifs ---- ', flush=True) + download_motifs(par) + print('----- preprocess ---- ', flush=True) + preprocess(par) + print('----- training ---- ', flush=True) + training(par) + print('----- create_prior ---- ', flush=True) + create_prior(par) + print('----- pyscenic_grn ---- ', flush=True) + pyscenic_grn(par) + print('----- prune_grn ---- ', flush=True) prune_grn(par) print('Curate predictions', flush=True) pruned_grn = pd.read_csv( diff --git a/src/methods/multi_omics/scglue/script.py b/src/methods/multi_omics/scglue/script.py index 3326ea231..61bd1b722 100644 --- a/src/methods/multi_omics/scglue/script.py +++ b/src/methods/multi_omics/scglue/script.py @@ -42,8 +42,8 @@ if args.num_workers: par['num_workers'] = args.num_workers -if args.resources_dir: - meta['resources_dir'] = args.resources_dir +# if args.resources_dir: +# meta['resources_dir'] = args.resources_dir # get gene annotation par['annotation_file'] = f"{par['temp_dir']}/gencode.v45.annotation.gtf.gz" diff --git a/src/methods/single_omics/grnboost2/script.py b/src/methods/single_omics/grnboost2/script.py index b7cc4ce47..98a06b9ad 100644 --- a/src/methods/single_omics/grnboost2/script.py +++ b/src/methods/single_omics/grnboost2/script.py @@ -84,18 +84,19 @@ def infer_grn(X, par): return network # par['cell_type_specific'] = False -if par['cell_type_specific']: - groups = adata_rna.obs.cell_type - i = 0 - for group in tqdm(np.unique(groups), desc="Processing groups"): - X_sub = X[groups == group, :] - net = infer_grn(X_sub, par) - net['cell_type'] = group - if i==0: - grn = net - else: - grn = pd.concat([grn, net], axis=0).reset_index(drop=True) - i += 1 +if 'cell_type_specific' in par: + if par['cell_type_specific']: + groups = adata_rna.obs.cell_type + i = 0 + for group in tqdm(np.unique(groups), desc="Processing groups"): + X_sub = X[groups == group, :] + net = infer_grn(X_sub, par) + net['cell_type'] = group + if i==0: + grn = net + else: + grn = pd.concat([grn, net], axis=0).reset_index(drop=True) + i += 1 else: grn = infer_grn(X, par) diff --git a/src/methods/single_omics/portia/script.py b/src/methods/single_omics/portia/script.py index 26ffc6a3f..6fc4121b3 100644 --- a/src/methods/single_omics/portia/script.py +++ b/src/methods/single_omics/portia/script.py @@ -26,30 +26,44 @@ import argparse parser = argparse.ArgumentParser(description="Process multiomics RNA data.") parser.add_argument('--multiomics_rna', type=str, help='Path to the multiomics RNA file') -parser.add_argument('--multiomics_atac', type=str, help='Path to the multiomics atac file') parser.add_argument('--prediction', type=str, help='Path to the prediction file') -parser.add_argument('--resources_dir', type=str, help='Path to the prediction file') parser.add_argument('--tf_all', type=str, help='Path to the tf_all') parser.add_argument('--num_workers', type=str, help='Number of cores') +parser.add_argument('--max_n_links', type=str, help='Number of top links to retain') +parser.add_argument('--causal', action='store_true', help='Enable causal mode') +parser.add_argument('--normalize', action='store_true') + args = parser.parse_args() if args.multiomics_rna: par['multiomics_rna'] = args.multiomics_rna +if args.causal: + par['causal'] = True +else: + par['causal'] = False + +if args.causal: + par['normalize'] = True +else: + par['normalize'] = False + if args.prediction: par['prediction'] = args.prediction if args.tf_all: par['tf_all'] = args.tf_all if args.num_workers: par['num_workers'] = args.num_workers - -if args.resources_dir: - meta['resources_dir'] = args.resources_dir +if args.max_n_links: + par['max_n_links'] = int(args.max_n_links) + +os.makedirs(par['temp_dir'], exist_ok=True) +import sys try: sys.path.append(meta["resources_dir"]) except: - meta= { - "resources_dir": 'src/utils/' + meta = { + 'resources_dir': 'src/utils' } sys.path.append(meta["resources_dir"]) from util import process_links diff --git a/src/metrics/regression_2/main.py b/src/metrics/regression_2/main.py index 4f35a29f8..3ef2769b8 100644 --- a/src/metrics/regression_2/main.py +++ b/src/metrics/regression_2/main.py @@ -341,11 +341,11 @@ def main(par: Dict[str, Any]) -> pd.DataFrame: verbose_print(par['verbose'], f'Static approach (theta=0.5):', 3) score_static_median = static_approach(net_matrix, n_features_theta_median, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers'], n_features_dict=n_features_dict, clip_scores=clip_scores) # print(f'Static approach (theta=1):', flush=True) - # score_static_max = static_approach(net_matrix, n_features_theta_max, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers']) + # score_static_max = static_approach(net_matrix, n_features_theta_max, X, groups, gene_names, tf_names, par['reg_type'], n_jobs=par['num_workers'], n_features_dict=n_features_dict, clip_scores=clip_scores) results = { 'static-theta-0.0': [float(score_static_min)], - 'static-theta-0.5': [float(score_static_median)] + 'static-theta-0.5': [float(score_static_median)], # 'static-theta-1.0': [float(score_static_max)], } @@ -363,12 +363,5 @@ def main(par: Dict[str, Any]) -> pd.DataFrame: df_results_store.append(df_results) df_results_concat = pd.concat(df_results_store, axis=0) - df_results_concat.index.name = 'donor_id' - print(df_results_concat.reset_index()) - df_results_mean = df_results_concat.reset_index().groupby('donor_id').mean() - print(df_results_mean) - - - - + df_results_mean = df_results_concat.mean(axis=0).to_frame().T return df_results_mean diff --git a/src/metrics/script_all.py b/src/metrics/script_all.py index 170efb8c7..4472139a0 100644 --- a/src/metrics/script_all.py +++ b/src/metrics/script_all.py @@ -59,30 +59,32 @@ os.makedirs(par['scores_dir'], exist_ok=True) -for max_n_links in [50000, 10000]: - par['max_n_links'] = max_n_links - for apply_skeleton in [False, True]: - par['apply_skeleton'] = apply_skeleton - for layer in par['layers']: - par['layer'] = layer - i = 0 - for method in par['methods']: - print(method) - par['prediction'] = f"{par['models_dir']}/{method}.csv" - if not os.path.exists(par['prediction']): - print(f"{par['prediction']} doesnt exist. Skipped.") - continue - from regression_1.main import main - reg1 = main(par) - from regression_2.main import main - reg2 = main(par) - score = pd.concat([reg1, reg2], axis=1) - score.index = [method] - if i==0: - df_all = score - else: - df_all = pd.concat([df_all, score]) - df_all.to_csv(f"{par['scores_dir']}/{max_n_links}-{apply_skeleton}-{layer}-{par['reg_type']}.csv") - print(df_all) - i+=1 +for binarize in [False, True]: + par['binarize'] = binarize + for max_n_links in [50000, 10000]: + par['max_n_links'] = max_n_links + for apply_skeleton in [True, False]: + par['apply_skeleton'] = apply_skeleton + for layer in par['layers']: + par['layer'] = layer + i = 0 + for method in par['methods']: + print(method) + par['prediction'] = f"{par['models_dir']}/{method}.csv" + if not os.path.exists(par['prediction']): + print(f"{par['prediction']} doesnt exist. Skipped.") + continue + from regression_1.main import main + reg1 = main(par) + from regression_2.main import main + reg2 = main(par) + score = pd.concat([reg1, reg2], axis=1) + score.index = [method] + if i==0: + df_all = score + else: + df_all = pd.concat([df_all, score]) + df_all.to_csv(f"{par['scores_dir']}/{max_n_links}-skeleton_{apply_skeleton}-binarize_{binarize}_{layer}-{par['reg_type']}.csv") + print(df_all) + i+=1 diff --git a/src/metrics/skeleton/script.py b/src/metrics/skeleton/script.py index b2a8c0570..146dd5dc7 100644 --- a/src/metrics/skeleton/script.py +++ b/src/metrics/skeleton/script.py @@ -11,26 +11,13 @@ from ast import literal_eval import requests import torch + + def preprocess(par): print('Reading input files', flush=True) rna = ad.read_h5ad(par['multiomics_rna']) atac = ad.read_h5ad(par['multiomics_atac']) - - rna.layers["counts"] = rna.X.copy() - sc.pp.highly_variable_genes(rna, n_top_genes=2000, flavor="seurat_v3") - sc.pp.normalize_total(rna) - sc.pp.log1p(rna) - sc.pp.scale(rna) - sc.tl.pca(rna, n_comps=100, svd_solver="auto") - sc.pp.neighbors(rna, metric="cosine") - sc.tl.umap(rna) - print('step 1 completed') - - scglue.data.lsi(atac, n_components=100, n_iter=15) - sc.pp.neighbors(atac, use_rep="X_lsi", metric="cosine") - sc.tl.umap(atac) - print('step 2 completed') - + scglue.data.get_gene_annotation( rna, gtf=par['annotation_file'], gtf_by="gene_name" @@ -71,140 +58,74 @@ def preprocess(par): atac.write(f"{par['temp_dir']}/atac.h5ad") nx.write_graphml(guidance, f"{par['temp_dir']}/guidance.graphml.gz") -def training(par): - os.makedirs(f"{par['temp_dir']}/glue", exist_ok=True) - rna = ad.read_h5ad(f"{par['temp_dir']}/rna.h5ad") - atac = ad.read_h5ad(f"{par['temp_dir']}/atac.h5ad") - guidance = nx.read_graphml(f"{par['temp_dir']}/guidance.graphml.gz") - scglue.models.configure_dataset( - rna, "NB", use_highly_variable=True, - use_layer="counts", use_rep="X_pca", use_batch='donor_id', use_cell_type='cell_type' - ) - scglue.models.configure_dataset( - atac, "NB", use_highly_variable=True, - use_rep="X_lsi", use_batch='donor_id', use_cell_type='cell_type' - ) - if False: - guidance_hvf = guidance.subgraph(chain( - rna.var.query("highly_variable").index, - atac.var.query("highly_variable").index - )).copy() - - glue = scglue.models.fit_SCGLUE( - {"rna": rna, "atac": atac}, guidance, - fit_kws={"directory": f"{par['temp_dir']}/glue"} - ) - - glue.save(f"{par['temp_dir']}/glue.dill") - - if True: # consistency score - dx = scglue.models.integration_consistency( - glue, {"rna": rna, "atac": atac}, guidance - ) - dx.to_csv(f"{par['temp_dir']}/consistency_scores.csv") - - rna.obsm["X_glue"] = glue.encode_data("rna", rna) - atac.obsm["X_glue"] = glue.encode_data("atac", atac) - feature_embeddings = glue.encode_graph(guidance) - feature_embeddings = pd.DataFrame(feature_embeddings, index=glue.vertices) - rna.varm["X_glue"] = feature_embeddings.reindex(rna.var_names).to_numpy() - atac.varm["X_glue"] = feature_embeddings.reindex(atac.var_names).to_numpy() - - rna.write(f"{par['rna-emb']}", compression="gzip") - atac.write(f"{par['atac-emb']}", compression="gzip") - nx.write_graphml(guidance, f"{par['guidance.graphml']}") -def peak_tf_gene_connections(par): - ''' Infers gene2peak connections - ''' - print('reload the data') - rna = ad.read_h5ad(f"{par['temp_dir']}/rna-emb.h5ad") - atac = ad.read_h5ad(f"{par['temp_dir']}/atac-emb.h5ad") - guidance = nx.read_graphml(f"{par['temp_dir']}/guidance.graphml.gz") - - rna.var["name"] = rna.var_names - atac.var["name"] = atac.var_names - - genes = rna.var.index - peaks = atac.var.index - features = pd.Index(np.concatenate([rna.var_names, atac.var_names])) - feature_embeddings = np.concatenate([rna.varm["X_glue"], atac.varm["X_glue"]]) - print('Get the skeleton') - - skeleton = guidance.edge_subgraph( + guidance = guidance.edge_subgraph( e for e, attr in dict(guidance.edges).items() if attr["type"] == "fwd" ).copy() - print('reginf') - reginf = scglue.genomics.regulatory_inference( - features, feature_embeddings, - skeleton=skeleton, random_state=0 - ) - print('gene2peak') - gene2peak = reginf.edge_subgraph( - e for e, attr in dict(reginf.edges).items() - if attr["qval"] < 0.1 - ) + sources = [] + targets = [] + for e, attr in dict(guidance.edges).items(): + sources.append(e[1]) + targets.append(e[0]) + df = pd.DataFrame({'source': sources, 'target':targets}) + df.to_csv(par['peak2gene.csv']) + +def get_flank_bed(par): + rna = ad.read_h5ad(par['multiomics_rna']) - scglue.genomics.Bed(atac.var).write_bed(f"{par['temp_dir']}/peaks.bed", ncols=3) - scglue.genomics.write_links( - gene2peak, - scglue.genomics.Bed(rna.var).strand_specific_start_site(), - scglue.genomics.Bed(atac.var), - f"{par['temp_dir']}/gene2peak.links", keep_attrs=["score"] + scglue.data.get_gene_annotation( + rna, gtf=par['annotation_file'], + gtf_by="gene_name" ) - print('this is the motif file: ', par['motif_file']) - motif_bed = scglue.genomics.read_bed(par['motif_file']) - # motif_bed = motif_bed.iloc[:100000, :] #TODO: remove this - # tfs = pd.Index(motif_bed["name"]).intersection(rna.var_names) + rna = rna[:, ~rna.var.chrom.isna()] - print("Generate TF cis-regulatory ranking bridged by ATAC peaks", flush=True) - peak_bed = scglue.genomics.Bed(atac.var.loc[peaks]) - peak2tf = scglue.genomics.window_graph(peak_bed, motif_bed, 0, right_sorted=True) - # peak2tf = peak2tf.edge_subgraph(e for e in peak2tf.edges if e[1] in tfs) + flank_bed = scglue.genomics.Bed(rna.var).strand_specific_start_site().expand(par['flank_length']/2, par['flank_length']/2) + flank_bed.to_csv(par['flank2gene']) - flank_bed = scglue.genomics.Bed(rna.var.loc[genes]).strand_specific_start_site().expand(500, 500) +def skeleton_promotor(par): + '''Creates promotor based skeleton using TF motif data and TSS flank''' + flank_bed = pd.read_csv(par['flank2gene']) + + motif_bed = scglue.genomics.read_bed(par['motif_file']) + flank2tf = scglue.genomics.window_graph(flank_bed, motif_bed, 0, right_sorted=True) - sources = [] targets = [] - for e, attr in dict(gene2peak.edges).items(): - sources.append(e[0]) - targets.append(e[1]) + for e, attr in dict(flank2tf.edges).items(): + sources.append(e[1]) + targets.append(e[0]) df = pd.DataFrame({'source': sources, 'target':targets}) - df.to_csv(par['gene2peak']) + df.to_csv(par['skeleton_promotor_file']) + +def skeleton_peak(par): + '''Creates peak based skeleton using TF motif data''' + atac = ad.read_h5ad(f"{par['temp_dir']}/atac-emb.h5ad") + + print('this is the motif file: ', par['motif_file']) + motif_bed = scglue.genomics.read_bed(par['motif_file']) + + print("Generate TF cis-regulatory ranking bridged by ATAC peaks", flush=True) + skeleton_peak = scglue.genomics.Bed(atac.var) + peak2tf = scglue.genomics.window_graph(peak_bed, motif_bed, 0, right_sorted=True) + peak2tf = peak2tf.edge_subgraph(e for e in peak2tf.edges if e[1] in tfs) sources = [] targets = [] for e, attr in dict(peak2tf.edges).items(): - sources.append(e[0]) - targets.append(e[1]) - df = pd.DataFrame({'source': sources, 'target':targets}) - df.to_csv(par['peak2tf']) + sources.append(e[1]) + targets.append(e[0]) + peak2tf = pd.DataFrame({'source': sources, 'target':targets}) - sources = [] - targets = [] - for e, attr in dict(flank2tf.edges).items(): - sources.append(e[0]) - targets.append(e[1]) - df = pd.DataFrame({'source': sources, 'target':targets}) - df.to_csv(par['flank2tf']) + # merge peak2tf with peak2gene + peak2tf.columns = ['peak', 'source'] -def merge_connections(par): - - gene2peak = pd.read_csv(par['gene2peak'], index_col=0) - gene2peak.columns = ['target', 'peak'] + peak2gene = pd.read_csv('output/skeleton/peak2gene.csv', index_col=0) + peak2gene.columns = ['peak', 'target'] - peak2tf= pd.read_csv(par['peak2tf'], index_col=0) - peak2tf.columns = ['peak', 'source'] + tf2gene = peak2gene.merge(peak2tf, on='peak', how='inner')[['source','target']].drop_duplicates() - flank2tf= pd.read_csv(par['flank2tf'], index_col=0) - flank2tf.columns = ['target', 'source'] - # merge gene2peak and peak2tf - tf2gene = gene2peak.merge(peak2tf, on='peak', how='inner')[['source','target']].drop_duplicates() - # merge flank2tf and tf2gene - tf2gene = pd.concat([tf2gene, flank2tf], axis=0).drop_duplicates() - tf2gene.to_csv(f"{par['tf2gene']}") + tf2gene.to_csv(par['skeleton_peak_file']) if __name__ == '__main__': par = { @@ -212,39 +133,72 @@ def merge_connections(par): 'multiomics_rna': f"resources/grn-benchmark/multiomics_rna.h5ad", 'annotation_file': f"output/db/gencode.v45.annotation.gtf.gz", # 'motif_file': 'output/db/ENCODE-TF-ChIP-hg38.bed.gz', - 'motif_file': 'output/db/jaspar_encode.bed.gz', 'temp_dir': 'output/skeleton', 'extend_range': 150000, - 'tf2gene': 'output/skeleton/tf2gene.csv' + 'skeleton': 'output/skeleton/skeleton.csv' } print(par) os.makedirs(par['temp_dir'], exist_ok=True) par['rna-emb'] = f"{par['temp_dir']}/rna-emb.h5ad" par['atac-emb'] = f"{par['temp_dir']}/atac-emb.h5ad" par['guidance.graphml'] = f"{par['temp_dir']}/guidance.graphml.gz" - - par['gene2peak'] = f"{par['temp_dir']}/gene2peak.csv" - par['peak2tf'] = f"{par['temp_dir']}/peak2tf.csv" - par['flank2tf'] = f"{par['temp_dir']}/flank2tf.csv" + par['peak2gene'] = f"{par['temp_dir']}/peak2gene.csv" + par['flank2gene'] = f"{par['temp_dir']}/flank2gene.csv" - # ---- simplify + + # ----- connect rna to atac -> peak2gene connections if False: - multiomics_atac = ad.read_h5ad(par['multiomics_atac']) - multiomics_atac = multiomics_atac[:, :10000] - - par['multiomics_atac'] = f"{par['temp_dir']}/multiomics_atac.h5ad" - multiomics_atac.write(par['multiomics_atac']) - - # ----- actual runs - # print('------- preprocess ---------') - # preprocess(par) - # print('------- training ---------') - # training(par) - print('------- peak_tf_gene_connections ---------') - peak_tf_gene_connections(par) - print('------- merge_connections ---------') - merge_connections(par) - + print('------- preprocess ---------') + preprocess(par) + print('------- get flank ---------') + get_flank_bed(par) + print('------- promotor based skeleton for different motif files ---------') + names = ['encode','jaspar'] + motif_files = ['output/db/ENCODE-TF-ChIP-hg38.bed.gz', 'output/db/JASPAR2022-hg38.bed.gz'] + if True: + for i, motif_file in enumerate(motif_files): + par['skeleton_promotor_file'] = f"{par['temp_dir']}/skeleton_{names[i]}_promotor.csv" + par['motif_file'] = motif_file + skeleton_promotor(par) + # - merge them + for i in range(len(names)): + df = pd.read_csv(f"{par['temp_dir']}/skeleton_{names[i]}_promotor.csv", index_col=0) + print(df.shape) + if i ==0 : + skeleton = df + else: + skeleton = pd.concat([df, skeleton], axis=0).drop_duplicates() + print(skeleton.shape) + skeleton.to_csv(f"{par['temp_dir']}/skeleton_promotor.csv") + print('------- peak based skeleton for different motif files ---------') + if False: + for i, motif_file in enumerate(motif_files): + par['skeleton_peak_file'] = f"{par['temp_dir']}/skeleton_peak_{names[i]}.csv" + par['motif_file'] = motif_file + skeleton_peak(par) + # - merge them + print('merging peak2tf from different motifs') + for i, name in enumerate(names): + df = pd.read_csv(f"{par['temp_dir']}/skeleton_peak_{names[i]}.csv") + print(df.source.nunique()) + print(df.target.nunique()) + if i ==0 : + skeleton_peak = df + else: + skeleton_peak = pd.concat([df, skeleton_peak], axis=0).drop_duplicates() + skeleton_peak.to_csv(f"{par['temp_dir']}/skeleton_peak.csv") + + print('------- mege peak based skeleton with promotor base skeleton ---------') + # - read peak based and promotor based skeletons + skeleton_peak = pd.read_csv(f"{par['temp_dir']}/skeleton_peak.csv")[['source', 'target']].drop_duplicates() + print(len(skeleton_peak), skeleton_peak.source.nunique(), skeleton_peak.target.nunique()) + skeleton_promotor = pd.read_csv(f"{par['temp_dir']}/skeleton_promotor.csv")[['source', 'target']].drop_duplicates() + print(len(skeleton_promotor), skeleton_promotor.source.nunique(), skeleton_promotor.target.nunique()) + + # - merge and save + skeleton = pd.concat([skeleton_promotor, skeleton_peak], axis=0).drop_duplicates() + print(len(skeleton), skeleton.source.nunique(), skeleton.target.nunique()) + skeleton.to_csv(f"{par['temp_dir']}/skeleton.csv") diff --git a/src/robustness_analysis/script_all.py b/src/robustness_analysis/script_all.py index f7431d266..aeecb75eb 100644 --- a/src/robustness_analysis/script_all.py +++ b/src/robustness_analysis/script_all.py @@ -9,23 +9,26 @@ 'reg_type': 'ridge', 'read_dir': "resources/grn_models/", 'write_dir': "resources/results/robustness_analysis", - 'degrees': [0, 10, 20, 50, 100], + # 'degrees': [0, 10, 20, 50, 100], + 'degrees': [50], 'noise_types': ["net", "sign"], - 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle'], - + # 'noise_types': ['weight'], + 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'], "perturbation_data": "resources/grn-benchmark/perturbation_data.h5ad", "tf_all": "resources/prior/tf_all.csv", "max_n_links": 50000, - "apply_tf": "true", - 'subsample': -2, - 'verbose': 3, - 'binarize': True, + "apply_tf": True, + 'binarize': False, + 'subsample': -1, + 'verbose': 0, 'num_workers': 20, 'consensus': 'resources/prior/consensus-num-regulators.json', 'static_only': True, 'clip_scores': True, - 'layer': 'scgen_pearson', + 'layer': 'pearson', + 'apply_skeleton': True, + 'skeleton': 'resources/prior/skeleton.csv' } meta = { @@ -38,7 +41,6 @@ os.makedirs(par['write_dir'], exist_ok=True) os.makedirs(f"{par['write_dir']}/tmp/", exist_ok=True) - def run_reg(par): from metrics.regression_1.main import main reg1 = main(par)