Skip to content

Commit

Permalink
Fixed external training dataset parameter. Now updates orig_params in…
Browse files Browse the repository at this point in the history
…stead of params
  • Loading branch information
stewarthe6 committed Jan 30, 2025
1 parent 65ec4da commit c5481a0
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 1 deletion.
2 changes: 1 addition & 1 deletion atomsci/ddm/pipeline/predict_from_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def predict_from_model_file(model_path, input_df, id_col='compound_id', smiles_c

pipe = mp.create_prediction_pipeline_from_file(pred_params, reload_dir=None, model_path=model_path)
if external_training_data is not None:
pipe.params.dataset_key=external_training_data
pipe.orig_params.dataset_key=external_training_data
pred_df = pipe.predict_full_dataset(input_df, contains_responses=has_responses, is_featurized=is_featurized,
dset_params=pred_params, AD_method=AD_method, k=k, dist_metric=dist_metric,
max_train_records_for_AD=max_train_records_for_AD, AD_return_NN=AD_return_NN)
Expand Down
19 changes: 19 additions & 0 deletions atomsci/ddm/test/integrative/external_dataset/H1_graphconv.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"system": "LC",
"datastore": "False",
"save_results": "False",
"data_owner": "username",
"prediction_type": "regression",
"dataset_key": "../../test_datasets/H1_std.csv",
"id_col": "compound_id",
"smiles_col": "base_rdkit_smiles",
"response_cols": "pKi_mean",
"split_uuid": "002251a2-83f8-4511-acf5-e8bbc5f86677",
"previously_split": "True",
"uncertainty": "True",
"verbose": "True",
"transformers": "True",
"model_type": "NN",
"featurizer": "graphconv",
"result_dir": "./output"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python

import json
import pandas as pd
import os
import sys

import atomsci.ddm.pipeline.parameter_parser as parse
from atomsci.ddm.pipeline import model_pipeline as mp
from atomsci.ddm.pipeline import predict_from_model as pfm
import pytest

def clean():
"""Clean test files"""
if "output" not in os.listdir():
os.mkdir("output")
for f in os.listdir("./output"):
if os.path.isfile("./output/"+f):
os.remove("./output/"+f)

def test():
"""Test AD index calculation: Curate data, fit model, and predict property for new compounds for each feature set"""

# Clean
# -----
clean()

# Run HyperOpt
# ------------
with open("H1_graphconv.json", "r") as f:
hp_params = json.load(f)

script_dir = parse.__file__.strip("parameter_parser.py").replace("/pipeline/", "")
python_path = sys.executable
hp_params["script_dir"] = script_dir
hp_params["python_path"] = python_path

params = parse.wrapper(hp_params)
if not os.path.isfile(params.dataset_key):
params.dataset_key = os.path.join(params.script_dir, params.dataset_key)

train_df = pd.read_csv(params.dataset_key)

pl = mp.ModelPipeline(params)
pl.train_model()

# this should raise an exception
with pytest.raises(Exception, match='Dataset file file_does_not_exist.csv does not exist'):
pred_df_file = pfm.predict_from_model_file(model_path=pl.params.model_tarball_path,
input_df=train_df[:10],
id_col="compound_id",
smiles_col="base_rdkit_smiles",
response_col="pKi_mean",
dont_standardize=True,
AD_method="z_score",
external_training_data='file_does_not_exist.csv')

# this should work
pred_df_file = pfm.predict_from_model_file(model_path=pl.params.model_tarball_path,
input_df=train_df[:10],
id_col="compound_id",
smiles_col="base_rdkit_smiles",
response_col="pKi_mean",
dont_standardize=True,
AD_method="z_score")
assert("AD_index" in pred_df_file.columns.values), 'Error: No AD_index column in pred_df_file'

clean()

if __name__ == '__main__':
test()

0 comments on commit c5481a0

Please sign in to comment.