Skip to content

Commit

Permalink
Merge branch 'master' into SXLLCAXAAD-9
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmorev authored Nov 26, 2024
2 parents f554aa4 + 569112e commit 756465e
Show file tree
Hide file tree
Showing 20 changed files with 175 additions and 172 deletions.
12 changes: 4 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ jobs:
test:
runs-on: ubuntu-latest
container:
image: ckan/ckan-dev:2.10.3
image: ckan/ckan-dev:2.10-py3.10
options: --user root
services:
solr:
image: ckan/ckan-solr:2.10
postgres:
image: ckan/ckan-postgres-dev:2.9
image: ckan/ckan-postgres-dev:2.10
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
Expand All @@ -26,16 +27,11 @@ jobs:

steps:
- uses: actions/checkout@v4
- name: Install Pyarrow dependencies
run: apt-get install -y build-essential ninja-build cmake
- name: Install requirements
# Install any extra requirements your extension has here (dev requirements, other extensions etc)
run: |
pip install --upgrade pip
pip install cmake
pip install -r requirements.txt
pip install -r dev-requirements.txt
pip install -e .
pip install -e '.[dev, pyarrow]'
- name: Setup extension
# Extra initialization steps
run: |
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[![Tests](https://github.com/DataShades/ckanext-charts/workflows/Tests/badge.svg?branch=main)](https://github.com/DataShades/ckanext-charts/actions)
[![Tests](https://github.com/DataShades/ckanext-charts/actions/workflows/test.yml/badge.svg)](https://github.com/DataShades/ckanext-charts/actions/workflows/test.yml)

# ckanext-charts

Expand Down
15 changes: 7 additions & 8 deletions ckanext/charts/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
import hashlib
import logging
import os
from re import T
import tempfile
import time
from abc import ABC, abstractmethod
from io import BytesIO
from typing import IO as File
from typing import IO

import pandas as pd
from redis.exceptions import ResponseError
Expand All @@ -18,7 +17,6 @@

from ckanext.charts import config, const, exception


log = logging.getLogger(__name__)


Expand Down Expand Up @@ -62,8 +60,8 @@ def set_data(self, key: str, data: pd.DataFrame):
self.client.setex(key, cache_ttl, data.to_csv(index=False))
else:
self.client.set(key, value=data.to_csv(index=False))
except Exception as e:
log.exception("Failed to save data to Redis: %s", e)
except Exception:
log.exception("Failed to save data to Redis")

def invalidate(self, key: str):
self.client.delete(key)
Expand Down Expand Up @@ -92,7 +90,7 @@ def get_data(self, key: str) -> pd.DataFrame | None:
return None

@abstractmethod
def read_data(self, file: File) -> pd.DataFrame | None:
def read_data(self, file: IO) -> pd.DataFrame | None:
pass

def set_data(self, key: str, data: pd.DataFrame) -> None:
Expand Down Expand Up @@ -140,7 +138,7 @@ class FileCacheORC(FileCache):

FILE_FORMAT = "orc"

def read_data(self, file: File) -> pd.DataFrame | None:
def read_data(self, file: IO) -> pd.DataFrame | None:
from pyarrow import orc

return orc.ORCFile(file).read().to_pandas()
Expand All @@ -151,12 +149,13 @@ def write_data(self, file_path: str, data: pd.DataFrame) -> None:

data.to_orc(file_path)


class FileCacheCSV(FileCache):
"""Cache data as CSV file"""

FILE_FORMAT = "csv"

def read_data(self, file: File) -> pd.DataFrame | None:
def read_data(self, file: IO) -> pd.DataFrame | None:
return pd.read_csv(file)

def write_data(self, file_path: str, data: pd.DataFrame) -> None:
Expand Down
5 changes: 2 additions & 3 deletions ckanext/charts/chart_builders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from __future__ import annotations

from .base import BaseChartBuilder
from .plotly import PlotlyBuilder, PlotlyBarForm
from .observable import ObservableBuilder
from .chartjs import ChartJSBarBuilder

from .observable import ObservableBuilder
from .plotly import PlotlyBarForm, PlotlyBuilder

DEFAULT_CHART_FORM = PlotlyBarForm

Expand Down
35 changes: 20 additions & 15 deletions ckanext/charts/chart_builders/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,22 @@
from abc import ABC, abstractmethod
from typing import Any, cast

import pandas as pd
import numpy as np
import pandas as pd

import ckan.types as types
import ckan.plugins.toolkit as tk
from ckan import types

import ckanext.charts.const as const
from ckanext.charts import const, fetchers
from ckanext.charts.exception import ChartTypeNotImplementedError
from ckanext.charts import fetchers


class FilterDecoder:
def __init__(
self, filter_input: str, pair_divider: str = "|", key_value_divider: str = ":"
self,
filter_input: str,
pair_divider: str = "|",
key_value_divider: str = ":",
):
self.filter_input = filter_input
self.pair_divider = pair_divider
Expand Down Expand Up @@ -62,11 +64,13 @@ def __init__(
# TODO: requires more work here...
# I'm not sure about other types, that column can have
if column_type == np.int64:
values = [int(value) for value in values]
converted_values = [int(value) for value in values]
elif column_type == np.float64:
values = [float(value) for value in values]
converted_values = [float(value) for value in values]
else:
converted_values = values

filtered_df = filtered_df[filtered_df[column].isin(values)]
filtered_df = filtered_df[filtered_df[column].isin(converted_values)]

self.df = filtered_df

Expand Down Expand Up @@ -118,7 +122,6 @@ def to_json(self) -> str:
"""This method should return the chart data as a dumped JSON data. It
will be passed to a JS script, that will render a chart based on this
data."""
pass

def drop_empty_values(self, data: dict[str, Any]) -> dict[str, Any]:
"""Remove empty values from the dictionary"""
Expand Down Expand Up @@ -408,7 +411,7 @@ def y_axis_field(self, choices: list[dict[str, str]]) -> dict[str, Any]:
self.get_validator("charts_if_empty_same_as")("names"),
self.get_validator("unicode_safe"),
],
}
},
)

return field
Expand Down Expand Up @@ -446,7 +449,7 @@ def y_multi_axis_field(

if max_items:
field["validators"].append(
self.get_validator("charts_list_length_validator")(max_items)
self.get_validator("charts_list_length_validator")(max_items),
)
field["form_attrs"]["maxItems"] = max_items

Expand Down Expand Up @@ -476,7 +479,7 @@ def values_multi_field(
self.get_validator("charts_to_list_if_string"),
],
"help_text": help_text,
}
},
)

return field
Expand All @@ -491,8 +494,10 @@ def split_data_field(self) -> dict[str, Any]:
self.get_validator("default")(False),
self.get_validator("boolean_validator"),
],
"help_text": """Split data into different columns by years based
on datetime column stated for the x-axis"""
"help_text": (
"Split data into different columns by years based on datetime "
"column stated for the x-axis"
),
}

def skip_null_values_field(self) -> dict[str, Any]:
Expand All @@ -504,7 +509,7 @@ def skip_null_values_field(self) -> dict[str, Any]:
"validators": [
self.get_validator("boolean_validator"),
],
"help_text": """Entries of the data with missing values will not be
"help_text": """Entries of the data with missing values will not be
graphed or will be skipped""",
}

Expand Down
54 changes: 28 additions & 26 deletions ckanext/charts/chart_builders/chartjs.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def create_zoom_and_title_options(self, options: str[dict, Any]) -> dict[str, An
"display": True,
"position": "bottom",
},
}
},
)
return options

Expand All @@ -69,7 +69,7 @@ def _prepare_data(self) -> dict[str, Any]:
"elements": {"bar": {"borderWidth": 1}},
"plugins": {"legend": {"position": "top"}},
"scales": {"y": {"beginAtZero": True}},
}
},
)

datasets = []
Expand All @@ -80,18 +80,18 @@ def _prepare_data(self) -> dict[str, Any]:
for label in data["data"]["labels"]:
try:
aggregate_value = int(
self.df[self.df[self.settings["x"]] == label][field].sum()
self.df[self.df[self.settings["x"]] == label][field].sum(),
)
except ValueError:
raise ChartBuildError(f"Column '{field}' is not numeric")
except ValueError as e:
raise ChartBuildError(f"Column '{field}' is not numeric") from e

dataset_data.append(aggregate_value)

datasets.append(
{
"label": field,
"data": dataset_data,
}
},
)

data["data"]["datasets"] = datasets
Expand Down Expand Up @@ -216,15 +216,15 @@ def to_json(self) -> str:
for label in data["data"]["labels"]:
dataset_data.append(
self.convert_to_native_types(
self.df[self.df[self.settings["names"]] == label][field].sum()
)
self.df[self.df[self.settings["names"]] == label][field].sum(),
),
)

data["data"]["datasets"] = [
{
"label": field,
"data": dataset_data,
}
},
]

return json.dumps(data)
Expand Down Expand Up @@ -280,17 +280,17 @@ def to_json(self) -> str:
dataset_data.append(
{
"x": self.convert_to_native_types(
data_series[self.settings["x"]]
data_series[self.settings["x"]],
),
"y": self.convert_to_native_types(data_series[field]),
}
},
)

data["data"]["datasets"] = [
{
"label": self.settings["y"],
"data": dataset_data,
}
},
]
data["options"] = self.create_zoom_and_title_options(data["options"])
return json.dumps(self._configure_date_axis(data))
Expand All @@ -311,7 +311,7 @@ def _configure_date_axis(self, data: dict[str, Any]) -> dict[str, Any]:
"unit": "day",
"displayFormats": {"day": "YYYY-MM-DD"},
},
}
},
)
scales["x"] = x_scale

Expand Down Expand Up @@ -360,18 +360,18 @@ def to_json(self) -> str:
}

dataset_data = []
max_size = self.df[self.settings["size"]].max()
size_max = self.df[self.settings["size"]].max()

for _, data_series in self.df.iterrows():
for field in [self.settings["y"]]:
dataset_data.append(
{
"x": self.convert_to_native_types(
data_series[self.settings["x"]]
data_series[self.settings["x"]],
),
"y": self.convert_to_native_types(data_series[field]),
"r": self._calculate_bubble_radius(data_series, max_size),
}
"r": self._calculate_bubble_radius(data_series, size_max),
},
)

data["data"]["datasets"] = [
Expand All @@ -381,22 +381,22 @@ def to_json(self) -> str:

return json.dumps(self._configure_date_axis(data))

def _calculate_bubble_radius(self, data_series: pd.Series, max_size: int) -> int:
def _calculate_bubble_radius(self, data_series: pd.Series, size_max: int) -> int:
"""Calculate bubble radius based on the size column"""
size_column: str = self.settings["size"]

# Handle cases where max_size is zero or NaN values are present
# Handle cases where size_max is zero or NaN values are present
# or the column is not numeric
try:
pd.to_numeric(max_size)
except ValueError:
raise ChartBuildError(f"Column '{size_column}' is not numeric")
pd.to_numeric(size_max)
except ValueError as e:
raise ChartBuildError(f"Column '{size_column}' is not numeric") from e

if max_size == 0 or np.isnan(max_size):
if size_max == 0 or np.isnan(size_max):
bubble_radius = self.min_bubble_radius
else:
data_series_size = np.nan_to_num(data_series[size_column], nan=0)
bubble_radius = (data_series_size / max_size) * 30
bubble_radius = (data_series_size / size_max) * 30

if bubble_radius < self.min_bubble_radius:
bubble_radius = self.min_bubble_radius
Expand Down Expand Up @@ -434,7 +434,7 @@ def to_json(self) -> str:
for value in self.settings["values"]:
try:
dataset_data.append(
self.df[self.df[self.settings["names"]] == label][value].item()
self.df[self.df[self.settings["names"]] == label][value].item(),
)
except ValueError:
# TODO: probably collision by name column, e.g two or more rows
Expand Down Expand Up @@ -468,7 +468,9 @@ def get_form_fields(self):
self.names_field(columns),
self.values_multi_field(
columns,
help_text="Select 3 or more different categorical variables (dimensions)",
help_text=(
"Select 3 or more different categorical variables (dimensions)"
),
),
self.more_info_button_field(),
self.limit_field(),
Expand Down
Loading

0 comments on commit 756465e

Please sign in to comment.