diff --git a/ci/310-numba.yaml b/ci/310-numba.yaml index 4a1e9c5f..b04be43f 100644 --- a/ci/310-numba.yaml +++ b/ci/310-numba.yaml @@ -7,6 +7,7 @@ dependencies: - scipy - beautifulsoup4 - scikit-learn + - distinctipy # testing - pytest - pytest-cov diff --git a/ci/310.yaml b/ci/310.yaml index a92794a5..762461d6 100644 --- a/ci/310.yaml +++ b/ci/310.yaml @@ -7,6 +7,7 @@ dependencies: - scipy - beautifulsoup4 - scikit-learn + - distinctipy # testing - pytest - pytest-cov diff --git a/ci/311.yaml b/ci/311.yaml index 9348d111..8751112f 100644 --- a/ci/311.yaml +++ b/ci/311.yaml @@ -7,6 +7,7 @@ dependencies: - scipy - beautifulsoup4 - scikit-learn + - distinctipy # testing - pytest - pytest-cov diff --git a/ci/38-numba.yaml b/ci/38-numba.yaml index f7759ab9..6c0f5196 100644 --- a/ci/38-numba.yaml +++ b/ci/38-numba.yaml @@ -7,6 +7,7 @@ dependencies: - scipy - beautifulsoup4 - scikit-learn + - distinctipy # testing - pytest - pytest-cov diff --git a/ci/38.yaml b/ci/38.yaml index c2aee545..38819e39 100644 --- a/ci/38.yaml +++ b/ci/38.yaml @@ -19,3 +19,4 @@ dependencies: - zstd - networkx - libpysal + - distinctipy diff --git a/ci/39-numba.yaml b/ci/39-numba.yaml index 026b441b..0dcb9dfb 100644 --- a/ci/39-numba.yaml +++ b/ci/39-numba.yaml @@ -7,6 +7,7 @@ dependencies: - scipy - beautifulsoup4 - scikit-learn + - distinctipy # testing - pytest - pytest-cov diff --git a/ci/39.yaml b/ci/39.yaml index 10b6eec7..e76c9fa1 100644 --- a/ci/39.yaml +++ b/ci/39.yaml @@ -19,3 +19,4 @@ dependencies: - zstd - networkx - libpysal + - distinctipy diff --git a/mapclassify/__init__.py b/mapclassify/__init__.py index b0772f81..227f356b 100644 --- a/mapclassify/__init__.py +++ b/mapclassify/__init__.py @@ -17,6 +17,7 @@ Percentiles, Quantiles, StdMean, + UniqueValue, UserDefined, gadf, load_example, diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 60add361..29697d6f 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -5,7 +5,11 @@ import functools import warnings +import distinctipy +import matplotlib.patches as mpatches +import matplotlib.pyplot as plt import numpy as np +import pandas as pd import scipy.stats as stats from sklearn.cluster import KMeans as KMEANS @@ -28,6 +32,7 @@ "Quantiles", "Percentiles", "StdMean", + "UniqueValue", "UserDefined", "gadf", "KClassifiers", @@ -49,6 +54,7 @@ "Quantiles", "Percentiles", "StdMean", + "UniqueValue", "UserDefined", ) @@ -205,6 +211,12 @@ def head_tail_breaks(values, cuts): return cuts +def unique_value(values): + """Unique value classifier.""" + labels, counts = np.unique(values, return_counts=True) + return labels, counts + + def quantile(y, k=4): """ Calculates the quantiles for an array. @@ -1080,6 +1092,109 @@ def plot( return f, ax +class UniqueValue: + def __init__(self, gdf, column): + """Unique value classifier. + + A unique value classifier is appropriate for a categorical + variable that has a discrete set of unique values. For + example, a map of US counties where the variable State takes + on the same value for counties belonging to the same state. + + This classifier creates k unique colors associated with the k + unique values of the column specified. + + + Parameters + ---------- + gdf : Geopandas GeoDataFrame + + column : str + Column of data frame with unique values to classify. + """ + + self.name = "UniqueValue" + self.y = gdf[column] + bins, counts = unique_value(self.y) + self.yb = self.y + self.counts = counts + self.classes = bins + self.colors = distinctipy.get_colors(len(counts)) + self.column = column + self.gdf = gdf + + def plot( + self, + figsize=(16, 9), + fontsize=17, + legend=True, + sort_by_counts=False, + ascending=True, + bbox_to_anchor=(1.7, 1), + loc="upper right", + axis_off=True, + title=None, + title_fontsize="medium", + ): + """ + Parameters + ---------- + figsize : tuple + Size of figure, width and hight in inches. + fontsize : int + Size of fonts for legend entries. + legend : bool, default True + Show legend. + sort_by_counts: bool, default False + Sort legend entries by counts. + ascending : bool, default True + Use ascending sort. + bbox_to_anchor : tuple + Lower left corner of legend box. + loc : str + matplotlib legend location + axis_off : bool, default True + Omit axis. + title : str + Legend title. + + title_fontsize : {'xx-small', 'x-small, 'small', 'medium', 'large', + x-large', xx-large'} + The font size of the legend's title. + """ + c = np.array(self.colors) + _colors = c[np.searchsorted(self.classes, self.yb)] + self.gdf.plot(color=_colors, figsize=figsize) + patch_list = [] + df = pd.DataFrame( + data={"label": self.classes, "count": self.counts, "color": self.colors} + ) + sort_col = "label" + if sort_by_counts: + sort_col = "count" + if legend: + w = len(str(df["count"].max())) + legend_dict = {} + for index, row in df.sort_values( + by=sort_col, ascending=ascending + ).iterrows(): + entry = f'{row["label"]:>{w}} ({row["count"]})' + legend_dict[entry] = row["color"] + data_key = mpatches.Patch(color=row["color"], label=entry) + patch_list.append(data_key) + + plt.legend( + handles=patch_list, + bbox_to_anchor=bbox_to_anchor, + loc=loc, + fontsize=fontsize, + title=title, + title_fontsize=title_fontsize, + ) + if axis_off: + plt.axis("off") + + class HeadTailBreaks(MapClassifier): """ Head/tail Breaks Map Classification for Heavy-tailed Distributions. diff --git a/notebooks/07_unique.ipynb b/notebooks/07_unique.ipynb new file mode 100644 index 00000000..001bdf0a --- /dev/null +++ b/notebooks/07_unique.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Unique Value Classification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-05T19:18:41.941529Z", + "start_time": "2022-11-05T19:18:40.603589Z" + } + }, + "outputs": [], + "source": [ + "import mapclassify\n", + "import numpy\n", + "import libpysal\n", + "import geopandas\n", + "import distinctipy\n", + "import matplotlib.patches as mpatches\n", + "import matplotlib.pyplot as plt\n", + "import pandas\n", + "\n", + "mapclassify.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "libpysal.examples.load_example(\"South\")\n", + "gdf = geopandas.read_file(libpysal.examples.get_path('south.shp'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uv = mapclassify.UniqueValue(gdf, 'STATE_NAME')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uv.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uv.plot(sort_by_counts=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uv.plot(sort_by_counts=True, ascending=False, bbox_to_anchor=(1.3,1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uv.plot(sort_by_counts=True, ascending=False, bbox_to_anchor=(1.3,1),\n", + " title='State')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = \"https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_20m.zip\"\n", + "us = geopandas.read_file(url)\n", + "us = us[us.STATEFP!='02']\n", + "us = us[us.STATEFP!='15']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(sort_by_counts=True, ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(sort_by_counts=True, ascending=False,\n", + " fontsize=8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(sort_by_counts=True, ascending=False,\n", + " fontsize=8, bbox_to_anchor=(1.1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(sort_by_counts=True, ascending=False,\n", + " fontsize=8, bbox_to_anchor=(1.1, 1), loc='center right')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(legend=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(legend=True, axis_off=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(legend=True, axis_off=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(legend=True, axis_off=False, \n", + " title='state fips')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(legend=True, axis_off=False, \n", + " title='state fips',\n", + " title_fontsize='large')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapclassify.UniqueValue(us, 'STATEFP').plot(legend=True, axis_off=False, \n", + " title='state fips',\n", + " title_fontsize='x-large')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/requirements.txt b/requirements.txt index 584658b9..036f592e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ numpy>=1.3 scikit-learn pandas>=1.0 networkx +distinctipy