Skip to content

Commit

Permalink
moved mlflow-flavor
Browse files Browse the repository at this point in the history
  • Loading branch information
mn-mikke committed Nov 10, 2023
1 parent bf5e77a commit d897548
Show file tree
Hide file tree
Showing 13 changed files with 58 additions and 280 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ ext {
pythonProjects = [
project(':h2o-py'),
project(':h2o-py-cloud-extensions'),
project(':h2o-py-mlflow-flavors')
project(':h2o-py-mlflow-flavor')
]

// The project which need to be run under CI only
Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions h2o-py-mlflow-flavor/DESCRIPTION.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
H2O-3 MLFlow Flavor
===================

A tiny library containing MLFlow flavor for working with H2O-3 MOJO and POJO models.
3 changes: 3 additions & 0 deletions h2o-py-mlflow-flavor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# H2O-3 MLFlow Flavor

A tiny library containing [MLFlow](https://mlflow.org/) flavors for working with H2O-3 MOJO and POJO models.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
description = "H2O-3 MLFlow Flavors"
description = "H2O-3 MLFlow Flavor"

dependencies {}

Expand All @@ -20,11 +20,11 @@ ext {
//
task createVersionFiles() {
doLast {
file("${buildDir}/h2o_mlflow_flavors/").mkdirs()
File version_file = new File("${buildDir}/h2o_mlflow_flavors/", "version.txt")
file("${buildDir}/h2o_mlflow_flavor/").mkdirs()
File version_file = new File("${buildDir}/h2o_mlflow_flavor/", "version.txt")
version_file.write(PROJECT_VERSION)

File build_file = new File("${buildDir}/h2o_mlflow_flavors/", "buildinfo.txt")
File build_file = new File("${buildDir}/h2o_mlflow_flavor/", "buildinfo.txt")
build_file.write(buildVersion.toString())
}
}
Expand All @@ -34,7 +34,7 @@ task copySrcFiles(type: Copy) {
from ("${projectDir}") {
include "setup.py"
include "setup.cfg"
include "h2o_mlflow_flavors/**"
include "h2o_mlflow_flavor/**"
include "README.md"
include "DESCRIPTION.rst"
}
Expand All @@ -45,7 +45,7 @@ task buildDist(type: Exec, dependsOn: [createVersionFiles, copySrcFiles]) {
workingDir buildDir
doFirst {
file("${buildDir}/tmp").mkdirs()
standardOutput = new FileOutputStream(file("${buildDir}/tmp/h2o_mlflow_flavors_buildDist.out"))
standardOutput = new FileOutputStream(file("${buildDir}/tmp/h2o_mlflow_flavor_buildDist.out"))
}
commandLine getOsSpecificCommandLine([pythonexe, "setup.py", "bdist_wheel"])
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
The `h2o_mlflow_flavors.h2o_gen_model` module provides an API for working with H2O MOJO and POJO models.
The `h2o_mlflow_flavor` module provides an API for working with H2O MOJO and POJO models.
"""

import logging
Expand All @@ -16,8 +16,8 @@
from mlflow.models import Model
from mlflow.models.model import MLMODEL_FILE_NAME
from mlflow.models.utils import _save_example
from mlflow.models.signature import ModelSignature
from mlflow.models.utils import ModelInputExample
from mlflow.models import ModelSignature, ModelInputExample
from mlflow.types.schema import ColSpec, ParamSchema, ParamSpec, Schema, DataType
from mlflow.utils.environment import (
_CONDA_ENV_FILE_NAME,
_CONSTRAINTS_FILE_NAME,
Expand All @@ -31,14 +31,12 @@
)
from mlflow.utils.file_utils import write_to
from mlflow.utils.model_utils import (
_add_code_from_conf_to_system_path,
_get_flavor_configuration,
_validate_and_copy_code_paths,
_validate_and_prepare_target_save_path,
)
from mlflow.utils.requirements_utils import _get_pinned_requirement
from mlflow.tracking.artifact_utils import _download_artifact_from_uri
from mlflow.models.signature import _infer_signature_from_input_example
from mlflow.types.utils import _infer_pandas_column

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -68,11 +66,11 @@ def get_params(h2o_model):
:param h2o_model: An H2O binary model.
:return: A dictionary of parameters that were used for training the model.
"""
def is_valid(key):
"""
def is_valid(key):
return key != "model_id" and \
not key.endswith("_frame") and \
not key.startswith("keep_cross_validation_")
not key.endswith("_frame") and \
not key.startswith("keep_cross_validation_")

return {key: val for key, val in h2o_model.actual_params.items() if is_valid(key)}

Expand Down Expand Up @@ -123,20 +121,39 @@ def get_input_example(h2o_model, number_of_records=5, relevant_columns_only=True
:param number_of_records: A number of records that will be extracted from the training dataset.
:param relevant_columns_only: A flag indicating whether the output dataset should contain
only columns required by the model. Defaults to ``True``.
:return:
:return: Pandas dataset made from the training dataset of H2O binary model
"""

import h2o
frame = h2o.get_frame(h2o_model.actual_params["training_frame"]).head(number_of_records)
result = frame.as_data_frame()
if relevant_columns_only:
relevant_columns = h2o_model.varimp(use_pandas=True)["variable"].values.tolist()
input_columns = [col for col in frame.col_names if col in relevant_columns]
return result[input_columns]
else:
else:
return result


def _infer_signature(h2o_model, wrapped_model, input_example):

input_schema = _get_input_schema(h2o_model)
prediction = wrapped_model.predict(input_example)
output_schema = Schema(
[ColSpec(type=_infer_pandas_column(prediction[col]), name=col) for col in prediction.columns]
)
return ModelSignature(inputs=input_schema, outputs=output_schema)


def _get_input_schema(h2o_model):
import h2o
training_frame = h2o.get_frame(h2o_model.actual_params["training_frame"])
relevant_columns = h2o_model.varimp(use_pandas=True)["variable"].values.tolist()
input_columns = [ColSpec(name=key, type=DataType.string)
for key, val in training_frame.types.items()
if key in relevant_columns]
return Schema(input_columns)

def save_model(
h2o_model,
path,
Expand Down Expand Up @@ -170,8 +187,8 @@ def save_model(
--setConvertInvalidNum - Converts invalid numbers to NA
--predictContributions - Returns also Shapley values a long with the predictions
--predictCalibrated - Return also calibrated prediction values.
"""
"""

import h2o
model_type_upper = model_type.upper()
if model_type_upper != "MOJO" and model_type_upper != "POJO":
Expand All @@ -194,22 +211,20 @@ def save_model(

if signature is None and input_example is not None:
wrapped_model = _H2OModelWrapper(model_file, model_type, path, extra_prediction_args)
signature = _infer_signature_from_input_example(input_example, wrapped_model)
signature = _infer_signature(h2o_model, wrapped_model, input_example)
elif signature is False:
signature = None

if mlflow_model is None:
mlflow_model = Model()
if signature is not None:
mlflow_model.signature = signature
if input_example is not None:
_save_example(mlflow_model, input_example, path)
if metadata is not None:
mlflow_model.metadata = metadata

pyfunc.add_to_model(
mlflow_model,
loader_module="h2o_mlflow_flavors.h2o_gen_model",
loader_module="h2o_mlflow_flavor",
model_path=model_file,
conda_env=_CONDA_ENV_FILE_NAME,
python_env=_PYTHON_ENV_FILE_NAME,
Expand Down Expand Up @@ -292,10 +307,10 @@ def log_model(
:return: A :py:class:`ModelInfo <mlflow.models.model.ModelInfo>` instance that contains the
metadata of the logged model.
"""
import h2o_mlflow_flavors
import h2o_mlflow_flavor
return Model.log(
artifact_path=artifact_path,
flavor=h2o_mlflow_flavors.h2o_gen_model,
flavor=h2o_mlflow_flavor,
registered_model_name=registered_model_name,
h2o_model=h2o_model,
conda_env=conda_env,
Expand All @@ -305,7 +320,6 @@ def log_model(
pip_requirements=pip_requirements,
extra_pip_requirements=extra_pip_requirements,
model_type=model_type,
metadata=metadata,
extra_prediction_args=extra_prediction_args,
**kwargs,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#
ignore = E241,E265,E302,E303,E701,D105
max-line-length = 120
application-import-names = h2o_mlflow_flavors
application-import-names = h2o_mlflow_flavor
import-order-style = smarkets
inline-quotes = "
Expand Down
12 changes: 5 additions & 7 deletions h2o-py-mlflow-flavors/setup.py → h2o-py-mlflow-flavor/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,30 @@
from setuptools import setup, find_packages
from codecs import open
import os
import sys
import shutil

here = os.path.abspath(os.path.dirname(__file__))

# Get the long description from the relevant file
with open(os.path.join(here, 'DESCRIPTION.rst'), encoding='utf-8') as f:
long_description = f.read()

version = "0.0.local"
version = "0.1.0-SNAPSHOT"
# Get the version from the relevant file
with open(os.path.join(here, 'h2o_mlflow_flavors/version.txt'), encoding='utf-8') as f:
with open(os.path.join(here, 'h2o_mlflow_flavor/version.txt'), encoding='utf-8') as f:
version = f.read()

packages = find_packages(exclude=["tests*"])
print("Found packages: %r" % packages)

setup(
name='h2o_mlflow_flavors',
name='h2o_mlflow_flavor',

# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version = version,

description='Collection of extensions for integration of H2O-3 with H2O.ai Cloud',
description='A mlflow flavor for working with H2O-3 MOJO and POJO models',
long_description=long_description,

# The project's main homepage.
Expand Down Expand Up @@ -68,7 +66,7 @@
"Programming Language :: Python :: 3.11",
],

keywords='machine learning, data mining, statistical analysis, modeling, big data, distributed, parallel',
keywords='ML Flow, H2O-3',

packages=packages,
package_data={"h2o": [
Expand Down
4 changes: 0 additions & 4 deletions h2o-py-mlflow-flavors/DESCRIPTION.rst

This file was deleted.

3 changes: 0 additions & 3 deletions h2o-py-mlflow-flavors/README.md

This file was deleted.

Empty file.
Loading

0 comments on commit d897548

Please sign in to comment.