From bdd6d02c912fee9f1c5eb4e56069b1caf6e2e0dc Mon Sep 17 00:00:00 2001 From: perdo Date: Wed, 6 Mar 2024 11:12:01 +0100 Subject: [PATCH] Added aggregated bin insights to examples --- examples/adm/ADMBinningInsights.ipynb | 481 +++++++++++++++++++++++--- python/docs/Makefile | 2 +- python/docs/source/index.rst | 1 + 3 files changed, 440 insertions(+), 44 deletions(-) diff --git a/examples/adm/ADMBinningInsights.ipynb b/examples/adm/ADMBinningInsights.ipynb index 891d54ae..93abdc01 100644 --- a/examples/adm/ADMBinningInsights.ipynb +++ b/examples/adm/ADMBinningInsights.ipynb @@ -14,8 +14,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 1, + "metadata": { + "nbsphinx": "hidden" + }, "outputs": [], "source": [ "# These lines are only for rendering in the docs, and are hidden through Jupyter tags\n", @@ -34,27 +36,67 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Temporarily add the BinAggregator path explicitly. This is only temp while\n", - "# not part of a released version of PDS tools.\n", - "\n", - "import site\n", - "import os\n", - "import pathlib\n", - "\n", - "notebook_folder = os.getcwd()\n", - "basePath = pathlib.Path(notebook_folder).parent.parent\n", - "site.addsitedir(pathlib.Path(basePath, 'python/pdstools/adm'))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "from pdstools import ADMDatamart, datasets\n", "import polars as pl\n", @@ -95,9 +137,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig = dm.plotPredictorBinning(modelids=['08ca1302-9fc0-57bf-9031-d4179d400493'], predictors=['Customer.AnnualIncome'])\n", "fig.update_layout(height=400, width=800, xaxis_title=\"\")\n", @@ -141,11 +217,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "from BinAggregator import BinAggregator\n", + "from pdstools import BinAggregator\n", "\n", "# For PDS tools example keep dm as above but the subset argument is important\n", "dm = datasets.CDHSample(subset=False)\n", @@ -177,18 +287,88 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (4, 9)
PredictorNameBinIndexBinLowerBoundBinUpperBoundBinSymbolLiftBinResponsesBinCoverageModels
stri64f64f64strf64f64f64i32
"Customer.Age"120.035.0"<35.0"0.066667433.3333331.01
"Customer.Age"235.050.0"<50.0"-0.1600.01.01
"Customer.Age"350.065.0"<65.0"2.0240.01.01
"Customer.Age"465.080.0"<80.0"2.0160.00.6666671
" + ], + "text/plain": [ + "shape: (4, 9)\n", + "┌────────────┬──────────┬────────────┬────────────┬───┬──────────┬────────────┬───────────┬────────┐\n", + "│ PredictorN ┆ BinIndex ┆ BinLowerBo ┆ BinUpperBo ┆ … ┆ Lift ┆ BinRespons ┆ BinCovera ┆ Models │\n", + "│ ame ┆ --- ┆ und ┆ und ┆ ┆ --- ┆ es ┆ ge ┆ --- │\n", + "│ --- ┆ i64 ┆ --- ┆ --- ┆ ┆ f64 ┆ --- ┆ --- ┆ i32 │\n", + "│ str ┆ ┆ f64 ┆ f64 ┆ ┆ ┆ f64 ┆ f64 ┆ │\n", + "╞════════════╪══════════╪════════════╪════════════╪═══╪══════════╪════════════╪═══════════╪════════╡\n", + "│ Customer.A ┆ 1 ┆ 20.0 ┆ 35.0 ┆ … ┆ 0.066667 ┆ 433.333333 ┆ 1.0 ┆ 1 │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 2 ┆ 35.0 ┆ 50.0 ┆ … ┆ -0.1 ┆ 600.0 ┆ 1.0 ┆ 1 │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 3 ┆ 50.0 ┆ 65.0 ┆ … ┆ 2.0 ┆ 240.0 ┆ 1.0 ┆ 1 │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 4 ┆ 65.0 ┆ 80.0 ┆ … ┆ 2.0 ┆ 160.0 ┆ 0.666667 ┆ 1 │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "└────────────┴──────────┴────────────┴────────────┴───┴──────────┴────────────┴───────────┴────────┘" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "myAggregator.combine_two_numbinnings(source, target)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig = myAggregator.roll_up(\"Customer.Age\")\n", "fig.update_layout(height=300, width=600)\n", @@ -206,9 +386,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig = myAggregator.roll_up(\"Customer.Age\", minimum=20, maximum=80, n=5, aggregation=\"Group\")\n", "fig.update_layout(height=500)\n", @@ -224,9 +438,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (20, 10)
PredictorNameBinIndexBinLowerBoundBinUpperBoundBinSymbolLiftBinResponsesBinCoverageModelsGroup
stri64f64f64strf64f64f64i32str
"Customer.Age"120.032.0"<32.0"0.2296434398.94069810.010"AutoLoans"
"Customer.Age"232.044.0"<44.0"-0.2615058280.45049610.010"AutoLoans"
"Customer.Age"344.056.0"<56.0"0.2810454540.74885610.010"AutoLoans"
"Customer.Age"456.068.0"<68.0"0.5011781272.36276810.010"AutoLoans"
"Customer.Age"568.080.0"<80.0"0.510309965.2379268.16666710"AutoLoans"
"Customer.Age"120.032.0"<32.0"0.11059995.7249517.07"HomeLoans"
"Customer.Age"232.044.0"<44.0"-0.0786231975.0713257.07"HomeLoans"
"Customer.Age"344.056.0"<56.0"0.0473131350.1102447.07"HomeLoans"
"Customer.Age"456.068.0"<68.0"0.145987251.3701127.07"HomeLoans"
"Customer.Age"568.080.0"<80.0"0.167526209.4750935.0833337"HomeLoans"
"Customer.Age"120.032.0"<32.0"0.038873247.6767261.01"CreditCards"
"Customer.Age"232.044.0"<44.0"-0.598058324.5798161.01"CreditCards"
"Customer.Age"344.056.0"<56.0"0.99698782.9893791.01"CreditCards"
"Customer.Age"456.068.0"<68.0"1.00769281.3679291.01"CreditCards"
"Customer.Age"568.080.0"<80.0"1.00769267.8066070.8333331"CreditCards"
"Customer.Age"120.032.0"<32.0"0.1553551749.9216282.02"Bundles"
"Customer.Age"232.044.0"<44.0"-0.1247863979.8205782.02"Bundles"
"Customer.Age"344.056.0"<56.0"0.2443972843.3392712.02"Bundles"
"Customer.Age"456.068.0"<68.0"-0.139278502.6503452.02"Bundles"
"Customer.Age"568.080.0"<80.0"-0.209373331.5839841.6666672"Bundles"
" + ], + "text/plain": [ + "shape: (20, 10)\n", + "┌────────────┬──────────┬────────────┬────────────┬───┬───────────┬───────────┬────────┬───────────┐\n", + "│ PredictorN ┆ BinIndex ┆ BinLowerBo ┆ BinUpperBo ┆ … ┆ BinRespon ┆ BinCovera ┆ Models ┆ Group │\n", + "│ ame ┆ --- ┆ und ┆ und ┆ ┆ ses ┆ ge ┆ --- ┆ --- │\n", + "│ --- ┆ i64 ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ i32 ┆ str │\n", + "│ str ┆ ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ ┆ │\n", + "╞════════════╪══════════╪════════════╪════════════╪═══╪═══════════╪═══════════╪════════╪═══════════╡\n", + "│ Customer.A ┆ 1 ┆ 20.0 ┆ 32.0 ┆ … ┆ 4398.9406 ┆ 10.0 ┆ 10 ┆ AutoLoans │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ 98 ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 2 ┆ 32.0 ┆ 44.0 ┆ … ┆ 8280.4504 ┆ 10.0 ┆ 10 ┆ AutoLoans │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ 96 ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 3 ┆ 44.0 ┆ 56.0 ┆ … ┆ 4540.7488 ┆ 10.0 ┆ 10 ┆ AutoLoans │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ 56 ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 4 ┆ 56.0 ┆ 68.0 ┆ … ┆ 1272.3627 ┆ 10.0 ┆ 10 ┆ AutoLoans │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ 68 ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ Customer.A ┆ 2 ┆ 32.0 ┆ 44.0 ┆ … ┆ 3979.8205 ┆ 2.0 ┆ 2 ┆ Bundles │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ 78 ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 3 ┆ 44.0 ┆ 56.0 ┆ … ┆ 2843.3392 ┆ 2.0 ┆ 2 ┆ Bundles │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ 71 ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 4 ┆ 56.0 ┆ 68.0 ┆ … ┆ 502.65034 ┆ 2.0 ┆ 2 ┆ Bundles │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ 5 ┆ ┆ ┆ │\n", + "│ Customer.A ┆ 5 ┆ 68.0 ┆ 80.0 ┆ … ┆ 331.58398 ┆ 1.666667 ┆ 2 ┆ Bundles │\n", + "│ ge ┆ ┆ ┆ ┆ ┆ 4 ┆ ┆ ┆ │\n", + "└────────────┴──────────┴────────────┴────────────┴───┴───────────┴───────────┴────────┴───────────┘" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "myAggregator.roll_up(\"Customer.Age\", minimum=20, maximum=80, n=5, aggregation=\"Group\", return_df=True)" ] @@ -240,9 +499,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig = myAggregator.roll_up(\"Customer.AnnualIncome\", boundaries=[10000, 20000, 30000], n=8, distribution=\"log\", aggregation=\"Channel\")\n", "fig.update_layout(height=300)\n", @@ -267,9 +560,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig = myAggregator.roll_up(\"Customer.MaritalStatus\")\n", "fig.update_layout(height=300, width=600)\n", @@ -289,9 +616,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "top_predictors = dm.plotPredictorPerformance(\n", " top_n = 10, \n", @@ -321,9 +682,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "binAggregator = BinAggregator(dm, query=pl.col(\"Group\").cast(pl.Utf8).str.contains(\"Loan\"))\n", "fig = binAggregator.roll_up([\"Customer.Prefix\", \"Customer.Age\"], n=6, aggregation=\"Group\")\n", diff --git a/python/docs/Makefile b/python/docs/Makefile index 19329da1..cc480644 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -23,7 +23,7 @@ help: %: Makefile mkdir -p source/articles - cp ../../examples/datamart/Example_ADM_Analysis.ipynb ../../examples/valuefinder/* ../../examples/adm/AGBModelVisualisation.ipynb ../../examples/hds/Example_Data_Anonymization.ipynb ../../examples/articles/*.ipynb source/articles + cp ../../examples/datamart/Example_ADM_Analysis.ipynb ../../examples/valuefinder/* ../../examples/adm/AGBModelVisualisation.ipynb ../../examples/adm/ADMBinningInsights.ipynb ../../examples/hds/Example_Data_Anonymization.ipynb ../../examples/articles/*.ipynb source/articles @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) rm -rf source/articles/output rm source/articles/* diff --git a/python/docs/source/index.rst b/python/docs/source/index.rst index 58350e61..a5307eb9 100644 --- a/python/docs/source/index.rst +++ b/python/docs/source/index.rst @@ -19,6 +19,7 @@ Welcome to the PDS Tools Python documentation :caption: Examples articles/Example_ADM_Analysis + articles/ADMBinningInsights articles/AGBModelVisualisation articles/vf_analysis articles/Example_Data_Anonymization