From b6f30e6165e2d1d88cb939db865c3473311d6273 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Wed, 25 Sep 2024 13:50:01 +0800 Subject: [PATCH] [Flyte Decks] support ydata-profiling in python 3.12 (#2766) * [Flyte Decks] support ydata-profiling in python 3.12 Signed-off-by: Future-Outlier * remove exclude deck standard python3.12 ci Signed-off-by: Future-Outlier * make plugin soft dependencies Signed-off-by: Future-Outlier * add dev-requirements.in Signed-off-by: Future-Outlier * nit Signed-off-by: Future-Outlier * better README with dependenc Signed-off-by: Future-Outlier * add other dependency in dev-requirements.in, this will help setup-global-uv Signed-off-by: Future-Outlier * Trigger CI Signed-off-by: Future-Outlier * Trigger CI Signed-off-by: Future-Outlier * Update dependenct Signed-off-by: Future-Outlier * new dockerfile dev Signed-off-by: Future-Outlier * new dockerfile Signed-off-by: Future-Outlier * new dockerfile Signed-off-by: Future-Outlier * revert back Signed-off-by: Future-Outlier * new dev image Signed-off-by: Future-Outlier --------- Signed-off-by: Future-Outlier --- .github/workflows/pythonbuild.yml | 3 - Dockerfile.dev | 6 + dev-requirements.in | 4 + dev-requirements.txt | 124 ++++++++++++++++-- plugins/flytekit-deck-standard/README.md | 43 +++++- .../dev-requirements.in | 5 + .../flytekitplugins/deck/__init__.py | 14 +- .../flytekitplugins/deck/renderer.py | 14 +- plugins/flytekit-deck-standard/setup.py | 9 +- 9 files changed, 191 insertions(+), 31 deletions(-) create mode 100644 plugins/flytekit-deck-standard/dev-requirements.in diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 41991b960f..5fd44b1c0e 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -396,9 +396,6 @@ jobs: # apache-beam, one of flytekit-airflow dependencies, does not support python 3.12: https://github.com/apache/beam/issues/29149 - python-version: 3.12 plugin-names: "flytekit-airflow" - # ydata-profiling, a dependency of flytekit-deck-standard, does not support python 3.12: https://github.com/ydataai/ydata-profiling/issues/1510 - - python-version: 3.12 - plugin-names: "flytekit-deck-standard" # Tensorflow is a dependency of flytekit-mlflow tests and that is not supported yet: https://github.com/tensorflow/tensorflow/issues/62003 - python-version: 3.12 plugin-names: "flytekit-mlflow" diff --git a/Dockerfile.dev b/Dockerfile.dev index c872d0dab4..1dd155729a 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -40,7 +40,13 @@ RUN SETUPTOOLS_SCM_PRETEND_VERSION_FOR_FLYTEKIT=$PSEUDO_VERSION \ -e /flytekit \ -e /flytekit/plugins/flytekit-deck-standard \ -e /flytekit/plugins/flytekit-flyteinteractive \ + markdown \ + pandas \ + pillow \ + plotly \ + pygments \ scikit-learn \ + ydata-profiling \ && apt-get clean autoclean \ && apt-get autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log}/ \ diff --git a/dev-requirements.in b/dev-requirements.in index ce4171018b..d6d7a54bcb 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -48,11 +48,15 @@ types-decorator types-mock autoflake +markdown pillow numpy pandas +plotly pyarrow +pygments scikit-learn +ydata-profiling types-requests prometheus-client diff --git a/dev-requirements.txt b/dev-requirements.txt index 0e69893ea6..5fd363804e 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -20,6 +20,8 @@ aioitertools==0.11.0 # via aiobotocore aiosignal==1.3.1 # via aiohttp +annotated-types==0.7.0 + # via pydantic asn1crypto==1.5.1 # via snowflake-connector-python asttokens==2.4.1 @@ -29,6 +31,7 @@ attrs==23.2.0 # aiohttp # hypothesis # jsonlines + # visions autoflake==2.3.1 # via -r dev-requirements.in azure-core==1.30.1 @@ -70,6 +73,8 @@ cloudpickle==3.0.0 # via flytekit codespell==2.3.0 # via -r dev-requirements.in +contourpy==1.3.0 + # via matplotlib coverage[toml]==7.5.3 # via # -r dev-requirements.in @@ -83,8 +88,11 @@ cryptography==43.0.1 # msal # pyjwt # pyopenssl - # secretstorage # snowflake-connector-python +cycler==0.12.1 + # via matplotlib +dacite==1.8.1 + # via ydata-profiling dataclasses-json==0.5.9 # via flytekit decorator==5.1.1 @@ -111,6 +119,8 @@ flyteidl @ git+https://github.com/flyteorg/flyte.git@master#subdirectory=flyteid # via # -r dev-requirements.in # flytekit +fonttools==4.54.1 + # via matplotlib frozenlist==1.4.1 # via # aiohttp @@ -175,6 +185,8 @@ grpcio-status==1.62.2 # via # flytekit # google-api-core +htmlmin==0.1.12 + # via ydata-profiling hypothesis==6.103.0 # via -r dev-requirements.in icdiff==2.0.7 @@ -186,6 +198,10 @@ idna==3.7 # requests # snowflake-connector-python # yarl +imagehash==4.3.1 + # via + # visions + # ydata-profiling importlib-metadata==7.1.0 # via flytekit iniconfig==2.0.0 @@ -206,16 +222,15 @@ jaraco-functools==4.0.1 # via keyring jedi==0.19.1 # via ipython -jeepney==0.8.0 - # via - # keyring - # secretstorage +jinja2==3.1.4 + # via ydata-profiling jmespath==1.0.1 # via botocore joblib==1.4.2 # via # -r dev-requirements.in # flytekit + # phik # scikit-learn jsonlines==4.0.0 # via flytekit @@ -225,12 +240,20 @@ keyring==25.2.1 # via flytekit keyrings-alt==5.0.1 # via -r dev-requirements.in +kiwisolver==1.4.7 + # via matplotlib kubernetes==29.0.0 # via -r dev-requirements.in +llvmlite==0.43.0 + # via numba +markdown==3.7 + # via -r dev-requirements.in markdown-it-py==3.0.0 # via # flytekit # rich +markupsafe==2.1.5 + # via jinja2 marshmallow==3.21.2 # via # dataclasses-json @@ -244,6 +267,12 @@ marshmallow-jsonschema==0.13.0 # via flytekit mashumaro==3.13 # via flytekit +matplotlib==3.9.2 + # via + # phik + # seaborn + # wordcloud + # ydata-profiling matplotlib-inline==0.1.7 # via ipython mdurl==0.1.2 @@ -265,21 +294,41 @@ multidict==6.0.5 # via # aiohttp # yarl +multimethod==1.12 + # via + # visions + # ydata-profiling mypy==1.6.1 # via -r dev-requirements.in mypy-extensions==1.0.0 # via # mypy # typing-inspect +networkx==3.3 + # via visions nodeenv==1.9.0 # via pre-commit +numba==0.60.0 + # via ydata-profiling numpy==1.26.4 # via # -r dev-requirements.in + # contourpy + # imagehash + # matplotlib + # numba # pandas + # patsy + # phik # pyarrow + # pywavelets # scikit-learn # scipy + # seaborn + # statsmodels + # visions + # wordcloud + # ydata-profiling oauthlib==3.2.2 # via # kubernetes @@ -291,22 +340,42 @@ packaging==24.0 # docker # google-cloud-bigquery # marshmallow + # matplotlib # msal-extensions + # plotly # pytest # setuptools-scm # snowflake-connector-python + # statsmodels pandas==2.2.2 - # via -r dev-requirements.in + # via + # -r dev-requirements.in + # phik + # seaborn + # statsmodels + # visions + # ydata-profiling parso==0.8.4 # via jedi +patsy==0.5.6 + # via statsmodels pexpect==4.9.0 # via ipython +phik==0.12.4 + # via ydata-profiling pillow==10.3.0 - # via -r dev-requirements.in + # via + # -r dev-requirements.in + # imagehash + # matplotlib + # visions + # wordcloud platformdirs==4.2.2 # via # snowflake-connector-python # virtualenv +plotly==5.24.1 + # via -r dev-requirements.in pluggy==1.5.0 # via pytest portalocker==2.8.2 @@ -350,10 +419,15 @@ pyasn1-modules==0.4.0 # via google-auth pycparser==2.22 # via cffi +pydantic==2.9.2 + # via ydata-profiling +pydantic-core==2.23.4 + # via pydantic pyflakes==3.2.0 # via autoflake pygments==2.18.0 # via + # -r dev-requirements.in # flytekit # ipython # rich @@ -363,6 +437,8 @@ pyjwt[crypto]==2.8.0 # snowflake-connector-python pyopenssl==24.2.1 # via snowflake-connector-python +pyparsing==3.1.4 + # via matplotlib pytest==8.2.1 # via # -r dev-requirements.in @@ -390,6 +466,7 @@ python-dateutil==2.9.0.post0 # croniter # google-cloud-bigquery # kubernetes + # matplotlib # pandas python-json-logger==2.0.7 # via flytekit @@ -402,11 +479,14 @@ pytz==2024.1 # croniter # pandas # snowflake-connector-python +pywavelets==1.7.0 + # via imagehash pyyaml==6.0.1 # via # flytekit # kubernetes # pre-commit + # ydata-profiling requests==2.32.3 # via # azure-core @@ -421,6 +501,7 @@ requests==2.32.3 # msal # requests-oauthlib # snowflake-connector-python + # ydata-profiling requests-oauthlib==2.0.0 # via # google-auth-oauthlib @@ -438,9 +519,14 @@ s3fs==2024.5.0 scikit-learn==1.5.0 # via -r dev-requirements.in scipy==1.13.1 - # via scikit-learn -secretstorage==3.3.3 - # via keyring + # via + # imagehash + # phik + # scikit-learn + # statsmodels + # ydata-profiling +seaborn==0.13.2 + # via ydata-profiling setuptools-scm==8.1.0 # via -r dev-requirements.in six==1.16.0 @@ -449,6 +535,7 @@ six==1.16.0 # azure-core # isodate # kubernetes + # patsy # python-dateutil snowflake-connector-python==3.12.1 # via -r dev-requirements.in @@ -460,14 +547,22 @@ stack-data==0.6.3 # via ipython statsd==3.3.0 # via flytekit +statsmodels==0.14.3 + # via ydata-profiling +tenacity==9.0.0 + # via plotly threadpoolctl==3.5.0 # via scikit-learn tomlkit==0.13.2 # via snowflake-connector-python +tqdm==4.66.5 + # via ydata-profiling traitlets==5.14.3 # via # ipython # matplotlib-inline +typeguard==4.3.0 + # via ydata-profiling types-croniter==2.0.0.20240423 # via -r dev-requirements.in types-decorator==5.1.8.20240310 @@ -485,8 +580,11 @@ typing-extensions==4.12.0 # flytekit # mashumaro # mypy + # pydantic + # pydantic-core # rich-click # snowflake-connector-python + # typeguard # typing-inspect typing-inspect==0.9.0 # via dataclasses-json @@ -502,16 +600,22 @@ urllib3==2.2.1 # types-requests virtualenv==20.26.2 # via pre-commit +visions[type-image-path]==0.7.6 + # via ydata-profiling wcwidth==0.2.13 # via prompt-toolkit websocket-client==1.8.0 # via # docker # kubernetes +wordcloud==1.9.3 + # via ydata-profiling wrapt==1.16.0 # via aiobotocore yarl==1.9.4 # via aiohttp +ydata-profiling==4.10.0 + # via -r dev-requirements.in zipp==3.19.1 # via importlib-metadata diff --git a/plugins/flytekit-deck-standard/README.md b/plugins/flytekit-deck-standard/README.md index 719a2e77a8..11ef6fb853 100644 --- a/plugins/flytekit-deck-standard/README.md +++ b/plugins/flytekit-deck-standard/README.md @@ -1,9 +1,50 @@ # Flytekit Deck Plugin -This Plugin provides more renderers to improve task visibility. +This plugin provides additional renderers to improve task visibility within Flytekit. + +## Installation To install the plugin, run the following command: ```bash pip install flytekitplugins-deck-standard ``` + +## Renderer Requirements + +Each renderer may require additional modules. + +The table below outlines the dependencies for each renderer: + +| Renderer | Required Module(s) | +|------------------------|-----------------------------| +| SourceCodeRenderer | `pygments` | +| FrameProfilingRenderer | `pandas`, `ydata-profiling` | +| MarkdownRenderer | `markdown` | +| BoxRenderer | `pandas`, `plotly` | +| ImageRenderer | `pillow` | +| TableRenderer | `pandas` | +| GanttChartRenderer | `pandas`, `plotly` | + +## Renderer Descriptions + +### SourceCodeRenderer +Converts Python source code to HTML using the Pygments library. + +### FrameProfilingRenderer +Generates a profiling report based on a pandas DataFrame using `ydata_profiling`. + +### MarkdownRenderer +Converts markdown strings to HTML. + +### BoxRenderer +Creates a box-and-whisker plot from a column in a pandas DataFrame. + +### ImageRenderer +Displays images from a `FlyteFile` or `PIL.Image.Image` object in HTML. + +### TableRenderer +Renders a pandas DataFrame as an HTML table with customizable headers and table width. + +### GanttChartRenderer +Displays a Gantt chart using a pandas DataFrame with "Start", "Finish", and "Name" columns. diff --git a/plugins/flytekit-deck-standard/dev-requirements.in b/plugins/flytekit-deck-standard/dev-requirements.in new file mode 100644 index 0000000000..970e7776f0 --- /dev/null +++ b/plugins/flytekit-deck-standard/dev-requirements.in @@ -0,0 +1,5 @@ +markdown +pandas +plotly +pygments +ydata-profiling diff --git a/plugins/flytekit-deck-standard/flytekitplugins/deck/__init__.py b/plugins/flytekit-deck-standard/flytekitplugins/deck/__init__.py index 279adb08dd..60dbd1591d 100644 --- a/plugins/flytekit-deck-standard/flytekitplugins/deck/__init__.py +++ b/plugins/flytekit-deck-standard/flytekitplugins/deck/__init__.py @@ -9,9 +9,19 @@ BoxRenderer FrameProfilingRenderer - MarkdownRenderer + GanttChartRenderer ImageRenderer + MarkdownRenderer + SourceCodeRenderer TableRenderer """ -from .renderer import BoxRenderer, FrameProfilingRenderer, ImageRenderer, MarkdownRenderer, TableRenderer +from .renderer import ( + BoxRenderer, + FrameProfilingRenderer, + GanttChartRenderer, + ImageRenderer, + MarkdownRenderer, + SourceCodeRenderer, + TableRenderer, +) diff --git a/plugins/flytekit-deck-standard/flytekitplugins/deck/renderer.py b/plugins/flytekit-deck-standard/flytekitplugins/deck/renderer.py index fbf05f0efe..1aca9595ce 100644 --- a/plugins/flytekit-deck-standard/flytekitplugins/deck/renderer.py +++ b/plugins/flytekit-deck-standard/flytekitplugins/deck/renderer.py @@ -9,11 +9,15 @@ import pandas as pd import PIL.Image import plotly.express as px + import pygments + import ydata_profiling else: pd = lazy_module("pandas") markdown = lazy_module("markdown") px = lazy_module("plotly.express") PIL = lazy_module("PIL") + ydata_profiling = lazy_module("ydata_profiling") + pygments = lazy_module("pygments") class SourceCodeRenderer: @@ -40,13 +44,9 @@ def to_html(self, source_code: str) -> str: Returns: str: The resulting HTML as a string, including CSS and highlighted source code. """ - from pygments import highlight - from pygments.formatters.html import HtmlFormatter - from pygments.lexers.python import PythonLexer - - formatter = HtmlFormatter(style="colorful") + formatter = pygments.formatters.html.HtmlFormatter(style="colorful") css = formatter.get_style_defs(".highlight").replace("#fff0f0", "#ffffff") - html = highlight(source_code, PythonLexer(), formatter) + html = pygments.highlight(source_code, pygments.lexers.python.PythonLexer(), formatter) return f"{html}" @@ -60,8 +60,6 @@ def __init__(self, title: str = "Pandas Profiling Report"): def to_html(self, df: "pd.DataFrame") -> str: assert isinstance(df, pd.DataFrame) - import ydata_profiling - profile = ydata_profiling.ProfileReport(df, title=self._title) return profile.to_html() diff --git a/plugins/flytekit-deck-standard/setup.py b/plugins/flytekit-deck-standard/setup.py index b0d2c4783d..c707084161 100644 --- a/plugins/flytekit-deck-standard/setup.py +++ b/plugins/flytekit-deck-standard/setup.py @@ -6,13 +6,6 @@ plugin_requires = [ "flytekit", - "markdown", - "plotly", - # ydata-profiling is not compatible with python 3.12 yet: https://github.com/ydataai/ydata-profiling/issues/1510 - "ydata-profiling; python_version<'3.12'", - "pandas", - "ipywidgets", - "pygments", ] __version__ = "0.0.0+develop" @@ -38,6 +31,8 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development",