From f1055f23d6cf810c7b1106a0cb80ea5a01aee6bb Mon Sep 17 00:00:00 2001
From: hhunterzinck <haley.hunterzinck@sagebase.org>
Date: Fri, 28 Jan 2022 03:11:02 +0000
Subject: [PATCH 1/6] validation script for  bpc to cbio mapping file

---
 validation/config.yaml     |  50 +++++++++
 validation/validate_map.py | 202 +++++++++++++++++++++++++++++++++++++
 2 files changed, 252 insertions(+)
 create mode 100644 validation/config.yaml
 create mode 100644 validation/validate_map.py

diff --git a/validation/config.yaml b/validation/config.yaml
new file mode 100644
index 0000000..5a66041
--- /dev/null
+++ b/validation/config.yaml
@@ -0,0 +1,50 @@
+dataset:
+  'Cancer-level dataset':
+    id: syn22296816  
+    file: ca_dx_derived.csv
+  'Patient-level dataset':
+    id: syn22296817
+    file: pt_derived.csv
+  'Regimen-Cancer level dataset':
+    id: syn22296818
+    file: ca_drugs_derived.csv
+  'Imaging-level dataset':
+    id: syn22296819
+    file: prissmm_image_derived.csv
+  'Pathology-report level dataset':
+    id: syn22296820
+    file: prissmm_path_derived.csv
+  'Med Onc Note level dataset':
+    id: syn22296822
+    file: prissmm_md_derived.csv
+  'Cancer panel test level dataset':
+    id: syn22296823
+    file: cpt_derived.csv
+  'Cancer-level index dataset': 
+    id: syn22314486
+    file: ca_dx_derived_index.csv
+  'Cancer-level non-index dataset': 
+    id: syn22314497
+    file: ca_dx_derived_non_index.csv
+  'Hemeonc dataset':
+    id: syn23561688
+    file: hemonc_mapping_cbio.csv
+  'PRISSMM Tumor Marker level dataset':
+    id: syn23561700
+    file: prissmm_tm_derived.csv
+  'Cancer-Directed Radiation Therapy dataset':
+    id: syn25931923  
+    file: ca_radtx_derived.csv
+check:
+  1: 
+    function: _check_code_name_empty
+    implemented: 1
+    deprecated: 0
+    description: Code is empty.
+    request: Please remove the row or fill in the code name. 
+  2: 
+    function: _check_code_name_absent
+    implemented: 1
+    deprecated: 0
+    description: Code does not exist in associated dataset.
+    request: Please check the code name and associated dataset. 
\ No newline at end of file
diff --git a/validation/validate_map.py b/validation/validate_map.py
new file mode 100644
index 0000000..4f68438
--- /dev/null
+++ b/validation/validate_map.py
@@ -0,0 +1,202 @@
+'''
+Description: Validate the BPC to cBioPortal mapping file. 
+Author: Haley Hunter-Zinck
+Date: 2022-01-27
+'''
+
+import argparse
+import logging
+import pandas as pd
+import yaml
+import logging
+import re
+
+import synapseclient
+from synapseclient import Synapse
+from synapseclient.core.exceptions import (
+    SynapseAuthenticationError,
+    SynapseNoCredentialsError,
+)
+
+def _check_code_name_empty(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
+  """Check for any code that is empty.
+     Args:
+      df: dataframe representing map
+      syn: Synapse object
+      config: configuration parameters
+    Returns:
+        dataframe with metadata on any empty codes.
+  """
+  empty = df.loc[pd.isna(df['code'])]['code']
+  return(list(empty))
+  
+def _check_code_name_absent(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
+  """Check for any code that is not code name that
+    does not appear in its associated data file.  
+    Args:
+        df: dataframe representing map
+        syn: Synapse object
+        config: configuration parameters
+    Returns:
+        dataframe with metadata on any missing codes.
+  """
+  absent = []
+  for dataset in config['dataset']:
+    data = pd.read_csv(syn.get(config['dataset'][dataset]['id'])['path'], low_memory=False)
+    code_data = data.columns
+    
+    # get codes associated with the dataset and of types derived or curated
+    code_map = list(df.loc[((df['dataset'] == dataset) & 
+                        ((df['data_type'].str.lower() == 'derived') | (df['data_type'].str.lower() == 'curated')))]['code'])
+                        
+    # do not check wildcard code names or NA code names
+    code_remove = []
+    for code in code_map:
+      if bool(re.match(r'^.+[*]$', str(code).strip())):
+        code_remove.append(code)
+      elif pd.isna(code):
+        code_remove.append(code)
+    for code in code_remove:
+      code_map.remove(code)
+    
+    absent.extend(list(set(code_map) - set(code_data)))
+  return(absent)
+
+
+def _format_result(codes: list, config: dict, check_no: int):
+  """Format output for interpretable log file.  
+    Args:
+        df: dataframe representing map
+        config: configuration parameters
+        check_no: check number for which to format results
+    Returns:
+        dataframe with additional metadata on any errors.
+  """
+  formatted = pd.DataFrame()
+  formatted['code'] = codes
+  formatted['check_no'] = str(check_no)
+  formatted['description'] = config['check'][check_no]['description']
+  formatted['action'] = config['check'][check_no]['request']
+  return(formatted)
+  
+def validate_map(synapse_id: str, syn: Synapse, config: dict, version: int) -> pd.DataFrame:
+  """Run all implemented checks on mapping file.  
+    Args:
+        synapse_id: Synapse ID of mapping file
+        syn: Synapse object
+        config: configuration parameters
+        version: Version number of Synapse ID
+    Returns:
+        dataframe with additional metadata on any errors.
+  """
+
+  errors = pd.DataFrame()
+  df = pd.DataFrame()
+  if (version == 'None'):
+    df = pd.read_csv(syn.get(synapse_id)['path'])
+  else:
+    df = pd.read_csv(syn.get(synapse_id, version=version)['path'])
+  
+  for check_no in config['check']:
+    
+    logging.info(f'Check {check_no}...')
+    
+    if (config['check'][check_no]['implemented'] and not config['check'][check_no]['deprecated']): 
+      function_name = config['check'][check_no]['function']
+      result = eval(function_name + "(df, syn, config)")
+      errors = errors.append(_format_result(result, config, check_no))
+      logging.info(f'  Found {errors.shape[0]} error(s).')
+    else:
+      logging.info('  Check deprecated or not implemented.')
+  
+  errors.insert(0, 'issue', range(1, errors.shape[0] + 1, 1))
+  
+  return(errors)
+
+def build_parser():
+  parser = argparse.ArgumentParser(
+      description="Checks validity of BPC to cBioPortal mapping file "
+  )
+  parser.add_argument(
+      "synapse_id",
+      metavar="SYNAPSE_ID",
+      type=str,
+      help="Synapse ID of mapping file",
+  )
+  parser.add_argument(
+        "--version",
+        "-v",
+        metavar="VERSION",
+        type=str,
+        default="None",
+        help="Synapse entity version number " "(default: current)",
+  )
+  parser.add_argument(
+        "--outfile",
+        "-o",
+        metavar="OUTFILE",
+        type=str,
+        default="output.csv",
+        help="Name of output file " "(default: %(default)s)",
+  )
+  parser.add_argument(
+        "--log",
+        "-l",
+        metavar="LEVEL",
+        type=str,
+        choices=["debug", "info", "warning", "error"],
+        default="error",
+        help="Set logging output level " "(default: %(default)s)",
+    )
+  return parser
+  
+  
+def read_config(file: str) -> dict:
+  config = None
+  with open(file, "r") as stream:
+    try:
+        config = yaml.safe_load(stream)
+    except yaml.YAMLError as exc:
+        print(exc)
+  return(config)
+
+
+def synapse_login(synapse_config=synapseclient.client.CONFIG_FILE):
+    """Login to Synapse
+    Args:
+        synapse_config: Path to synapse configuration file.
+                        Defaults to ~/.synapseConfig
+    Returns:
+        Synapse connection
+    """
+    try:
+        syn = synapseclient.Synapse(skip_checks=True, configPath=synapse_config)
+        syn.login(silent=True)
+    except (SynapseNoCredentialsError, SynapseAuthenticationError):
+        raise ValueError(
+            "Login error: please make sure you have correctly "
+            "configured your client.  Instructions here: "
+            "https://help.synapse.org/docs/Client-Configuration.1985446156.html.  "
+            "You can also create a Synapse Personal Access Token and set it "
+            "as an environmental variable: "
+            "SYNAPSE_AUTH_TOKEN='<my_personal_access_token>'"
+        )
+    return syn
+
+  
+def main():
+
+    args = build_parser().parse_args()
+    config = read_config('config.yaml')
+    syn = synapse_login()
+
+    numeric_level = getattr(logging, args.log.upper(), None)
+    if not isinstance(numeric_level, int):
+        raise ValueError("Invalid log level: %s" % args.log)
+    logging.basicConfig(level=numeric_level)
+    
+    res = validate_map(args.synapse_id, syn, config, args.version)
+    res.to_csv(args.outfile, index=False)
+
+if __name__ == "__main__":
+    main()

From 8ad4a3260b9c3601f04688a2f082bb238762b8bc Mon Sep 17 00:00:00 2001
From: Haley Hunter-Zinck <17149604+hhunterzinck@users.noreply.github.com>
Date: Fri, 28 Jan 2022 08:36:21 -0800
Subject: [PATCH 2/6] Update validation/validate_map.py

Co-authored-by: Thomas Yu <thomas.yu@sagebase.org>
---
 validation/validate_map.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/validation/validate_map.py b/validation/validate_map.py
index 4f68438..44b2dfc 100644
--- a/validation/validate_map.py
+++ b/validation/validate_map.py
@@ -6,17 +6,17 @@
 
 import argparse
 import logging
-import pandas as pd
-import yaml
-import logging
 import re
 
+import pandas as pd
 import synapseclient
 from synapseclient import Synapse
 from synapseclient.core.exceptions import (
     SynapseAuthenticationError,
     SynapseNoCredentialsError,
 )
+import yaml
+
 
 def _check_code_name_empty(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
   """Check for any code that is empty.

From 093fb354b4f89f237458e4b341a6312f1bedfbe9 Mon Sep 17 00:00:00 2001
From: hhunterzinck <haley.hunterzinck@sagebase.org>
Date: Fri, 28 Jan 2022 16:40:03 +0000
Subject: [PATCH 3/6] add double newline between functions and black

---
 validation/validate_map.py | 204 ++++++++++++++++++++-----------------
 1 file changed, 112 insertions(+), 92 deletions(-)

diff --git a/validation/validate_map.py b/validation/validate_map.py
index 44b2dfc..349a7bc 100644
--- a/validation/validate_map.py
+++ b/validation/validate_map.py
@@ -1,8 +1,8 @@
-'''
+"""
 Description: Validate the BPC to cBioPortal mapping file. 
 Author: Haley Hunter-Zinck
 Date: 2022-01-27
-'''
+"""
 
 import argparse
 import logging
@@ -19,68 +19,83 @@
 
 
 def _check_code_name_empty(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
-  """Check for any code that is empty.
+    """Check for any code that is empty.
      Args:
       df: dataframe representing map
       syn: Synapse object
       config: configuration parameters
     Returns:
         dataframe with metadata on any empty codes.
-  """
-  empty = df.loc[pd.isna(df['code'])]['code']
-  return(list(empty))
-  
+    """
+    empty = df.loc[pd.isna(df["code"])]["code"]
+    return list(empty)
+
+
 def _check_code_name_absent(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
-  """Check for any code that is not code name that
-    does not appear in its associated data file.  
+    """Check for any code that is not code name that
+    does not appear in its associated data file.
     Args:
         df: dataframe representing map
         syn: Synapse object
         config: configuration parameters
     Returns:
         dataframe with metadata on any missing codes.
-  """
-  absent = []
-  for dataset in config['dataset']:
-    data = pd.read_csv(syn.get(config['dataset'][dataset]['id'])['path'], low_memory=False)
-    code_data = data.columns
-    
-    # get codes associated with the dataset and of types derived or curated
-    code_map = list(df.loc[((df['dataset'] == dataset) & 
-                        ((df['data_type'].str.lower() == 'derived') | (df['data_type'].str.lower() == 'curated')))]['code'])
-                        
-    # do not check wildcard code names or NA code names
-    code_remove = []
-    for code in code_map:
-      if bool(re.match(r'^.+[*]$', str(code).strip())):
-        code_remove.append(code)
-      elif pd.isna(code):
-        code_remove.append(code)
-    for code in code_remove:
-      code_map.remove(code)
-    
-    absent.extend(list(set(code_map) - set(code_data)))
-  return(absent)
+    """
+    absent = []
+    for dataset in config["dataset"]:
+        data = pd.read_csv(
+            syn.get(config["dataset"][dataset]["id"])["path"], low_memory=False
+        )
+        code_data = data.columns
+
+        # get codes associated with the dataset and of types derived or curated
+        code_map = list(
+            df.loc[
+                (
+                    (df["dataset"] == dataset)
+                    & (
+                        (df["data_type"].str.lower() == "derived")
+                        | (df["data_type"].str.lower() == "curated")
+                    )
+                )
+            ]["code"]
+        )
+
+        # do not check wildcard code names or NA code names
+        code_remove = []
+        for code in code_map:
+            if bool(re.match(r"^.+[*]$", str(code).strip())):
+                code_remove.append(code)
+            elif pd.isna(code):
+                code_remove.append(code)
+        for code in code_remove:
+            code_map.remove(code)
+
+        absent.extend(list(set(code_map) - set(code_data)))
+    return absent
 
 
 def _format_result(codes: list, config: dict, check_no: int):
-  """Format output for interpretable log file.  
+    """Format output for interpretable log file.
     Args:
         df: dataframe representing map
         config: configuration parameters
         check_no: check number for which to format results
     Returns:
         dataframe with additional metadata on any errors.
-  """
-  formatted = pd.DataFrame()
-  formatted['code'] = codes
-  formatted['check_no'] = str(check_no)
-  formatted['description'] = config['check'][check_no]['description']
-  formatted['action'] = config['check'][check_no]['request']
-  return(formatted)
-  
-def validate_map(synapse_id: str, syn: Synapse, config: dict, version: int) -> pd.DataFrame:
-  """Run all implemented checks on mapping file.  
+    """
+    formatted = pd.DataFrame()
+    formatted["code"] = codes
+    formatted["check_no"] = str(check_no)
+    formatted["description"] = config["check"][check_no]["description"]
+    formatted["action"] = config["check"][check_no]["request"]
+    return formatted
+
+
+def validate_map(
+    synapse_id: str, syn: Synapse, config: dict, version: int
+) -> pd.DataFrame:
+    """Run all implemented checks on mapping file.
     Args:
         synapse_id: Synapse ID of mapping file
         syn: Synapse object
@@ -88,58 +103,62 @@ def validate_map(synapse_id: str, syn: Synapse, config: dict, version: int) -> p
         version: Version number of Synapse ID
     Returns:
         dataframe with additional metadata on any errors.
-  """
-
-  errors = pd.DataFrame()
-  df = pd.DataFrame()
-  if (version == 'None'):
-    df = pd.read_csv(syn.get(synapse_id)['path'])
-  else:
-    df = pd.read_csv(syn.get(synapse_id, version=version)['path'])
-  
-  for check_no in config['check']:
-    
-    logging.info(f'Check {check_no}...')
-    
-    if (config['check'][check_no]['implemented'] and not config['check'][check_no]['deprecated']): 
-      function_name = config['check'][check_no]['function']
-      result = eval(function_name + "(df, syn, config)")
-      errors = errors.append(_format_result(result, config, check_no))
-      logging.info(f'  Found {errors.shape[0]} error(s).')
+    """
+
+    errors = pd.DataFrame()
+    df = pd.DataFrame()
+    if version == "None":
+        df = pd.read_csv(syn.get(synapse_id)["path"])
     else:
-      logging.info('  Check deprecated or not implemented.')
-  
-  errors.insert(0, 'issue', range(1, errors.shape[0] + 1, 1))
-  
-  return(errors)
+        df = pd.read_csv(syn.get(synapse_id, version=version)["path"])
+
+    for check_no in config["check"]:
+
+        logging.info(f"Check {check_no}...")
+
+        if (
+            config["check"][check_no]["implemented"]
+            and not config["check"][check_no]["deprecated"]
+        ):
+            function_name = config["check"][check_no]["function"]
+            result = eval(function_name + "(df, syn, config)")
+            errors = errors.append(_format_result(result, config, check_no))
+            logging.info(f"  Found {errors.shape[0]} error(s).")
+        else:
+            logging.info("  Check deprecated or not implemented.")
+
+    errors.insert(0, "issue", range(1, errors.shape[0] + 1, 1))
+
+    return errors
+
 
 def build_parser():
-  parser = argparse.ArgumentParser(
-      description="Checks validity of BPC to cBioPortal mapping file "
-  )
-  parser.add_argument(
-      "synapse_id",
-      metavar="SYNAPSE_ID",
-      type=str,
-      help="Synapse ID of mapping file",
-  )
-  parser.add_argument(
+    parser = argparse.ArgumentParser(
+        description="Checks validity of BPC to cBioPortal mapping file "
+    )
+    parser.add_argument(
+        "synapse_id",
+        metavar="SYNAPSE_ID",
+        type=str,
+        help="Synapse ID of mapping file",
+    )
+    parser.add_argument(
         "--version",
         "-v",
         metavar="VERSION",
         type=str,
         default="None",
         help="Synapse entity version number " "(default: current)",
-  )
-  parser.add_argument(
+    )
+    parser.add_argument(
         "--outfile",
         "-o",
         metavar="OUTFILE",
         type=str,
         default="output.csv",
         help="Name of output file " "(default: %(default)s)",
-  )
-  parser.add_argument(
+    )
+    parser.add_argument(
         "--log",
         "-l",
         metavar="LEVEL",
@@ -148,17 +167,17 @@ def build_parser():
         default="error",
         help="Set logging output level " "(default: %(default)s)",
     )
-  return parser
-  
-  
+    return parser
+
+
 def read_config(file: str) -> dict:
-  config = None
-  with open(file, "r") as stream:
-    try:
-        config = yaml.safe_load(stream)
-    except yaml.YAMLError as exc:
-        print(exc)
-  return(config)
+    config = None
+    with open(file, "r") as stream:
+        try:
+            config = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+    return config
 
 
 def synapse_login(synapse_config=synapseclient.client.CONFIG_FILE):
@@ -183,20 +202,21 @@ def synapse_login(synapse_config=synapseclient.client.CONFIG_FILE):
         )
     return syn
 
-  
+
 def main():
 
     args = build_parser().parse_args()
-    config = read_config('config.yaml')
+    config = read_config("config.yaml")
     syn = synapse_login()
 
     numeric_level = getattr(logging, args.log.upper(), None)
     if not isinstance(numeric_level, int):
         raise ValueError("Invalid log level: %s" % args.log)
     logging.basicConfig(level=numeric_level)
-    
+
     res = validate_map(args.synapse_id, syn, config, args.version)
     res.to_csv(args.outfile, index=False)
 
+
 if __name__ == "__main__":
     main()

From 759a21db0f0464e43f9d0c8b3523feb7a6b9f596 Mon Sep 17 00:00:00 2001
From: hhunterzinck <haley.hunterzinck@sagebase.org>
Date: Sat, 29 Jan 2022 01:23:08 +0000
Subject: [PATCH 4/6] add function map and remove eval call

---
 validation/validate_map.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/validation/validate_map.py b/validation/validate_map.py
index 349a7bc..150a98f 100644
--- a/validation/validate_map.py
+++ b/validation/validate_map.py
@@ -75,7 +75,7 @@ def _check_code_name_absent(df: pd.DataFrame, syn: Synapse, config: dict) -> lis
     return absent
 
 
-def _format_result(codes: list, config: dict, check_no: int):
+def _format_result(codes: list, config: dict, check_no: int) -> pd.DataFrame:
     """Format output for interpretable log file.
     Args:
         df: dataframe representing map
@@ -92,6 +92,14 @@ def _format_result(codes: list, config: dict, check_no: int):
     return formatted
 
 
+def _create_function_map() -> dict:
+  fxns = {
+  "_check_code_name_absent": _check_code_name_absent,
+  "_check_code_name_empty": _check_code_name_empty
+  }
+  return fxns
+
+
 def validate_map(
     synapse_id: str, syn: Synapse, config: dict, version: int
 ) -> pd.DataFrame:
@@ -107,6 +115,7 @@ def validate_map(
 
     errors = pd.DataFrame()
     df = pd.DataFrame()
+    fxns = _create_function_map()
     if version == "None":
         df = pd.read_csv(syn.get(synapse_id)["path"])
     else:
@@ -120,8 +129,8 @@ def validate_map(
             config["check"][check_no]["implemented"]
             and not config["check"][check_no]["deprecated"]
         ):
-            function_name = config["check"][check_no]["function"]
-            result = eval(function_name + "(df, syn, config)")
+            fxn_name = config["check"][check_no]["function"]
+            result = fxns[fxn_name](df, syn, config)
             errors = errors.append(_format_result(result, config, check_no))
             logging.info(f"  Found {errors.shape[0]} error(s).")
         else:
@@ -161,7 +170,6 @@ def build_parser():
     parser.add_argument(
         "--log",
         "-l",
-        metavar="LEVEL",
         type=str,
         choices=["debug", "info", "warning", "error"],
         default="error",

From 1730e6bddb036e7b77652dcd9ff10bcee7729c26 Mon Sep 17 00:00:00 2001
From: hhunterzinck <haley.hunterzinck@sagebase.org>
Date: Sat, 29 Jan 2022 01:55:28 +0000
Subject: [PATCH 5/6] add test to check function map and config file list

---
 validation/config.yaml      |  4 ++--
 validation/test_validate.py | 18 ++++++++++++++++++
 validation/validate_map.py  | 24 +++++++++++++-----------
 3 files changed, 33 insertions(+), 13 deletions(-)
 create mode 100644 validation/test_validate.py

diff --git a/validation/config.yaml b/validation/config.yaml
index 5a66041..14969fb 100644
--- a/validation/config.yaml
+++ b/validation/config.yaml
@@ -37,13 +37,13 @@ dataset:
     file: ca_radtx_derived.csv
 check:
   1: 
-    function: _check_code_name_empty
+    function: check_code_name_empty
     implemented: 1
     deprecated: 0
     description: Code is empty.
     request: Please remove the row or fill in the code name. 
   2: 
-    function: _check_code_name_absent
+    function: check_code_name_absent
     implemented: 1
     deprecated: 0
     description: Code does not exist in associated dataset.
diff --git a/validation/test_validate.py b/validation/test_validate.py
new file mode 100644
index 0000000..531fb95
--- /dev/null
+++ b/validation/test_validate.py
@@ -0,0 +1,18 @@
+"""Test validate map"""
+import yaml
+
+from validate_map import *
+
+
+def test__function_map():
+    """Test that all functions referenced in the config file are listed in the function map."""
+    config = read_config("config.yaml")
+    fxn_map = create_function_map()
+
+    fxn_config = []
+    for check in config["check"]:
+        fxn_config.append(config["check"][check]["function"])
+
+    config_not_map = set(fxn_config) - set(fxn_map.keys())
+
+    assert len(config_not_map) == 0
diff --git a/validation/validate_map.py b/validation/validate_map.py
index 150a98f..f6068a2 100644
--- a/validation/validate_map.py
+++ b/validation/validate_map.py
@@ -18,7 +18,7 @@
 import yaml
 
 
-def _check_code_name_empty(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
+def check_code_name_empty(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
     """Check for any code that is empty.
      Args:
       df: dataframe representing map
@@ -31,7 +31,7 @@ def _check_code_name_empty(df: pd.DataFrame, syn: Synapse, config: dict) -> list
     return list(empty)
 
 
-def _check_code_name_absent(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
+def check_code_name_absent(df: pd.DataFrame, syn: Synapse, config: dict) -> list:
     """Check for any code that is not code name that
     does not appear in its associated data file.
     Args:
@@ -75,7 +75,7 @@ def _check_code_name_absent(df: pd.DataFrame, syn: Synapse, config: dict) -> lis
     return absent
 
 
-def _format_result(codes: list, config: dict, check_no: int) -> pd.DataFrame:
+def format_result(codes: list, config: dict, check_no: int) -> pd.DataFrame:
     """Format output for interpretable log file.
     Args:
         df: dataframe representing map
@@ -92,12 +92,12 @@ def _format_result(codes: list, config: dict, check_no: int) -> pd.DataFrame:
     return formatted
 
 
-def _create_function_map() -> dict:
-  fxns = {
-  "_check_code_name_absent": _check_code_name_absent,
-  "_check_code_name_empty": _check_code_name_empty
-  }
-  return fxns
+def create_function_map() -> dict:
+    fxns = {
+        "check_code_name_absent": check_code_name_absent,
+        "check_code_name_empty": check_code_name_empty,
+    }
+    return fxns
 
 
 def validate_map(
@@ -115,7 +115,7 @@ def validate_map(
 
     errors = pd.DataFrame()
     df = pd.DataFrame()
-    fxns = _create_function_map()
+    fxns = create_function_map()
     if version == "None":
         df = pd.read_csv(syn.get(synapse_id)["path"])
     else:
@@ -131,7 +131,7 @@ def validate_map(
         ):
             fxn_name = config["check"][check_no]["function"]
             result = fxns[fxn_name](df, syn, config)
-            errors = errors.append(_format_result(result, config, check_no))
+            errors = errors.append(format_result(result, config, check_no))
             logging.info(f"  Found {errors.shape[0]} error(s).")
         else:
             logging.info("  Check deprecated or not implemented.")
@@ -225,6 +225,8 @@ def main():
     res = validate_map(args.synapse_id, syn, config, args.version)
     res.to_csv(args.outfile, index=False)
 
+    logging.info(f"Output written to '{args.outfile}'")
+
 
 if __name__ == "__main__":
     main()

From d6fc1340d39d8e35e819d8dd2963ed3548fa9f51 Mon Sep 17 00:00:00 2001
From: Haley Hunter-Zinck <haley.hunterzinck@sagebase.org>
Date: Fri, 28 Jan 2022 18:37:31 -0800
Subject: [PATCH 6/6] change directory name

---
 {validation => scripts}/config.yaml      | 0
 {validation => scripts}/test_validate.py | 0
 {validation => scripts}/validate_map.py  | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename {validation => scripts}/config.yaml (100%)
 rename {validation => scripts}/test_validate.py (100%)
 rename {validation => scripts}/validate_map.py (100%)

diff --git a/validation/config.yaml b/scripts/config.yaml
similarity index 100%
rename from validation/config.yaml
rename to scripts/config.yaml
diff --git a/validation/test_validate.py b/scripts/test_validate.py
similarity index 100%
rename from validation/test_validate.py
rename to scripts/test_validate.py
diff --git a/validation/validate_map.py b/scripts/validate_map.py
similarity index 100%
rename from validation/validate_map.py
rename to scripts/validate_map.py