Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unique value classifier for categorical maps with distinct colors for large number of categories. #173

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/310-numba.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- scipy
- beautifulsoup4
- scikit-learn
- distinctipy
# testing
- pytest
- pytest-cov
Expand Down
1 change: 1 addition & 0 deletions ci/310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- scipy
- beautifulsoup4
- scikit-learn
- distinctipy
# testing
- pytest
- pytest-cov
Expand Down
1 change: 1 addition & 0 deletions ci/311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- scipy
- beautifulsoup4
- scikit-learn
- distinctipy
# testing
- pytest
- pytest-cov
Expand Down
1 change: 1 addition & 0 deletions ci/38-numba.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- scipy
- beautifulsoup4
- scikit-learn
- distinctipy
# testing
- pytest
- pytest-cov
Expand Down
1 change: 1 addition & 0 deletions ci/38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ dependencies:
- zstd
- networkx
- libpysal
- distinctipy
1 change: 1 addition & 0 deletions ci/39-numba.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
- scipy
- beautifulsoup4
- scikit-learn
- distinctipy
# testing
- pytest
- pytest-cov
Expand Down
1 change: 1 addition & 0 deletions ci/39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ dependencies:
- zstd
- networkx
- libpysal
- distinctipy
1 change: 1 addition & 0 deletions mapclassify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Percentiles,
Quantiles,
StdMean,
UniqueValue,
UserDefined,
gadf,
load_example,
Expand Down
115 changes: 115 additions & 0 deletions mapclassify/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
import functools
import warnings

import distinctipy
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
from sklearn.cluster import KMeans as KMEANS

Expand All @@ -28,6 +32,7 @@
"Quantiles",
"Percentiles",
"StdMean",
"UniqueValue",
"UserDefined",
"gadf",
"KClassifiers",
Expand All @@ -49,6 +54,7 @@
"Quantiles",
"Percentiles",
"StdMean",
"UniqueValue",
"UserDefined",
)

Expand Down Expand Up @@ -205,6 +211,12 @@ def head_tail_breaks(values, cuts):
return cuts


def unique_value(values):
"""Unique value classifier."""
labels, counts = np.unique(values, return_counts=True)
return labels, counts


def quantile(y, k=4):
"""
Calculates the quantiles for an array.
Expand Down Expand Up @@ -1080,6 +1092,109 @@ def plot(
return f, ax


class UniqueValue:
def __init__(self, gdf, column):
"""Unique value classifier.

A unique value classifier is appropriate for a categorical
variable that has a discrete set of unique values. For
example, a map of US counties where the variable State takes
on the same value for counties belonging to the same state.

This classifier creates k unique colors associated with the k
unique values of the column specified.


Parameters
----------
gdf : Geopandas GeoDataFrame

column : str
Column of data frame with unique values to classify.
"""

self.name = "UniqueValue"
self.y = gdf[column]
bins, counts = unique_value(self.y)
self.yb = self.y
self.counts = counts
self.classes = bins
self.colors = distinctipy.get_colors(len(counts))
self.column = column
self.gdf = gdf

def plot(
self,
figsize=(16, 9),
fontsize=17,
legend=True,
sort_by_counts=False,
ascending=True,
bbox_to_anchor=(1.7, 1),
loc="upper right",
axis_off=True,
title=None,
title_fontsize="medium",
):
"""
Parameters
----------
figsize : tuple
Size of figure, width and hight in inches.
fontsize : int
Size of fonts for legend entries.
legend : bool, default True
Show legend.
sort_by_counts: bool, default False
Sort legend entries by counts.
ascending : bool, default True
Use ascending sort.
bbox_to_anchor : tuple
Lower left corner of legend box.
loc : str
matplotlib legend location
axis_off : bool, default True
Omit axis.
title : str
Legend title.

title_fontsize : {'xx-small', 'x-small, 'small', 'medium', 'large',
x-large', xx-large'}
The font size of the legend's title.
"""
c = np.array(self.colors)
_colors = c[np.searchsorted(self.classes, self.yb)]
self.gdf.plot(color=_colors, figsize=figsize)
patch_list = []
df = pd.DataFrame(
data={"label": self.classes, "count": self.counts, "color": self.colors}
)
sort_col = "label"
if sort_by_counts:
sort_col = "count"
if legend:
w = len(str(df["count"].max()))
legend_dict = {}
for index, row in df.sort_values(
by=sort_col, ascending=ascending
).iterrows():
entry = f'{row["label"]:>{w}} ({row["count"]})'
legend_dict[entry] = row["color"]
data_key = mpatches.Patch(color=row["color"], label=entry)
patch_list.append(data_key)

plt.legend(
handles=patch_list,
bbox_to_anchor=bbox_to_anchor,
loc=loc,
fontsize=fontsize,
title=title,
title_fontsize=title_fontsize,
)
if axis_off:
plt.axis("off")


class HeadTailBreaks(MapClassifier):
"""
Head/tail Breaks Map Classification for Heavy-tailed Distributions.
Expand Down
Loading