Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

454 reorganize pipeline decision treepy script #549

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions scripts/feedback.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"mw_saxs": 58.15,
"mw_model": 59.5,
"mw_err": 0.0,
"best_model": "multi_state_model_2_1_1.dat",
"overall_chi_square": 1.86,
"q_ranges": [
0.009046,
0.1,
0.2,
0.36925301
],
"chi_squares_of_regions": [
2.65,
1.86,
1.44
],
"residuals_of_regions": [
-0.086,
0.003,
0.058
],
"mw_feedback": "The difference between the model MW (59.5) and the SAXS MW (58.15) is within acceptable error (2.3%).",
"overall_chi_square_feedback": "The overall chi-square of this fit is 1.86. Moderate.",
"highest_chi_square_feedback": "The chi-square is highest (2.65) in the region where (0.01 < q < 0.1).",
"second_highest_chi_square_feedback": "The 2nd highest chi-square (1.86) is in the region where 0.1 < q < 0.2, but this is okay.",
"regional_chi_square_feedback": "The overall structure of the PDB model needs improvement. This may come from a sequence that is off, or the presence of oligomerization states in the sample."
}
60 changes: 11 additions & 49 deletions scripts/pipeline_decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,58 +131,20 @@ def best_chi_square_i(cs_models, multi_state_models):
Selects the best chi-square from all multi-state files in the input folder.

Returns the index of the best chi-square in cs_models.

Chooses the first chi-square with less than 20% error compared to the next value.
If no such chi-square exists, returns the index of the minimum chi-square.
["1.7", "1.8", "1.9", "2.5"]
["multi_state_model_1_1_1.dat", "multi_state_model_2_1_1.dat]
Selects the highest cs that is <= 2. If they are all > 2, select the lowest.
"""
print_debug("Comparing chi-squares of all multistates")

# Round and print for debug purposes
cs_models_rounded = [round(cs, 2) for cs in cs_models]
print_debug(cs_models_rounded)

csm_err_threshold = 0.2

# Handle single value case immediately
if len(cs_models) == 1:
return 0

# Iterate over pairs of chi-square values
for _, (cs_current, cs_next) in enumerate(zip(cs_models, cs_models[1:])):
csm_err = abs(cs_next - cs_current) / cs_current

if csm_err <= csm_err_threshold:
return _log_and_return_best(cs_current, cs_models, multi_state_models)

# If no chi-square meets the threshold, return the smallest one
best_cs = min(cs_models)
return _log_and_return_best(best_cs, cs_models, multi_state_models)


def _log_and_return_best(best_cs, cs_models, multi_state_models):
"""
Logs and returns the index of the best chi-square value.
"""
best_index = cs_models.index(best_cs)
multi_states_file = multi_state_models[best_index]

# Extract the multi-state number
multi_states_num = _extract_model_number(multi_states_file)

print_debug(
f"The best chi-square is {round(best_cs, 2)} "
f"({multi_states_num} multi states)"
)
return best_index

best_cs = cs_models[0]
elif any (cs < 2 for cs in cs_models):
best_cs = max((cs for cs in cs_models if cs < 2))
else:
best_cs = min(cs_models)

def _extract_model_number(filename):
"""
Extracts the multi-state model number from the filename.
Assumes the pattern 'multi_state_model_#'.
"""
start = filename.find("multi_state_model_") + 18
return filename[start]
return best_cs


def calculate_regional_chi_square_values(
Expand Down Expand Up @@ -629,7 +591,7 @@ def handle_poor_fit(
if second_highest_chi_square_flag == "mid_q_err":
feedback += (
" The movement of flexible regions in the model also do not seem to "
"improve the fitting."
"improve the fit."
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

excellent catch

)
elif second_highest_chi_square_flag == "high_q_err":
feedback += " There are also background subtraction problems."
Expand All @@ -655,7 +617,7 @@ def handle_poor_fit(
elif second_highest_chi_square_flag == "mid_q_err":
feedback += (
" The movement of flexible regions in the model also do not seem to "
"improve the fitting."
"improve the fit."
)
else:
feedback = (
Expand Down
Loading
Loading