Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update DiagnosticReport to calculate base correctness of synthetic data #496

Merged
merged 7 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 0 additions & 151 deletions sdmetrics/reports/_results_handler.py

This file was deleted.

32 changes: 30 additions & 2 deletions sdmetrics/reports/base_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def __init__(self):
self._overall_score = None
self.is_generated = False
self._properties = {}
self._results_handler = None
self.report_info = {
'report_type': self.__class__.__name__,
'generated_date': None,
Expand Down Expand Up @@ -91,6 +90,25 @@ def convert_datetimes(real_data, synthetic_data, metadata):
except Exception:
continue

def _print_results(self, verbose):
"""Print the results.

Args:
verbose (bool):
Whether or not to print results to std.out.
"""
if verbose:
sys.stdout.write(
f'\nOverall Score: {round(self._overall_score * 100, 2)}%\n\n'
)
sys.stdout.write('Properties:\n')

for property_name, property_instance in self._properties.items():
property_score = round(property_instance._compute_average() * 100, 2)
sys.stdout.write(
f'- {property_name}: {property_score}%\n'
)

def generate(self, real_data, synthetic_data, metadata, verbose=True):
"""Generate report.

Expand Down Expand Up @@ -152,7 +170,7 @@ def generate(self, real_data, synthetic_data, metadata, verbose=True):
end_time = time.time()
self.report_info['generation_time'] = end_time - start_time

self._handle_results(verbose)
self._print_results(verbose)

def _check_property_name(self, property_name):
"""Check that the given property name is valid.
Expand All @@ -168,6 +186,16 @@ def _check_property_name(self, property_name):
f" Valid property names are '{valid_property_names}'."
)

def get_score(self):
"""Return the overall score.

Returns:
float
The overall score.
"""
self._check_report_generated()
return self._overall_score

def get_info(self):
"""Get the information about the report."""
return deepcopy(self.report_info)
Expand Down
27 changes: 5 additions & 22 deletions sdmetrics/reports/multi_table/diagnostic_report.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,19 @@
"""Multi table diagnostic report."""
from copy import deepcopy

from sdmetrics.reports._results_handler import DiagnosticReportResultsHandler
from sdmetrics.reports.multi_table._properties import Boundary, Coverage, Synthesis
from sdmetrics.reports.multi_table._properties import DataValidity, RelationshipValidity, Structure
from sdmetrics.reports.multi_table.base_multi_table_report import BaseMultiTableReport


class DiagnosticReport(BaseMultiTableReport):
"""Multi table diagnostic report.

This class creates a diagnostic report for multi-table data. It calculates the diagnostic
score along three properties - Synthesis, Coverage, and Boundary.
score along three properties - Relationship Validity, Data Structure, and Data Validity.
"""

def __init__(self):
super().__init__()
self._properties = {
'Coverage': Coverage(),
'Boundary': Boundary(),
'Synthesis': Synthesis()
'Data Validity': DataValidity(),
'Data Structure': Structure(),
'Relationship Validity': RelationshipValidity()
}
self._results_handler = DiagnosticReportResultsHandler()

def _handle_results(self, verbose):
self._results_handler.print_results(self._properties, verbose)

def get_results(self):
"""Return the diagnostic results.

Returns:
dict
The diagnostic results.
"""
self._check_report_generated()
return deepcopy(self._results_handler.results)
15 changes: 0 additions & 15 deletions sdmetrics/reports/multi_table/quality_report.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Multi table quality report."""
from sdmetrics.reports._results_handler import QualityReportResultsHandler
from sdmetrics.reports.multi_table._properties import (
Cardinality, ColumnPairTrends, ColumnShapes, InterTableTrends)
from sdmetrics.reports.multi_table.base_multi_table_report import BaseMultiTableReport
Expand All @@ -20,17 +19,3 @@ def __init__(self):
'Cardinality': Cardinality(),
'Intertable Trends': InterTableTrends()
}
self._results_handler = QualityReportResultsHandler()

def _handle_results(self, verbose):
self._results_handler.print_results(self._properties, self._overall_score, verbose)

def get_score(self):
"""Return the overall quality score.

Returns:
float
The overall quality score.
"""
self._check_report_generated()
return self._overall_score
29 changes: 4 additions & 25 deletions sdmetrics/reports/single_table/diagnostic_report.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,18 @@
"""Single table diagnostic report."""
import logging
from copy import deepcopy

from sdmetrics.reports._results_handler import DiagnosticReportResultsHandler
from sdmetrics.reports.base_report import BaseReport
from sdmetrics.reports.single_table._properties import Boundary, Coverage, Synthesis

LOGGER = logging.getLogger(__name__)
from sdmetrics.reports.single_table._properties import DataValidity, Structure


class DiagnosticReport(BaseReport):
"""Single table diagnostic report.

This class creates a diagnostic report for single-table data. It calculates the diagnostic
score along three properties - Synthesis, Coverage, and Boundary.
score along two properties - Data Structure and Data Validity.
"""

def __init__(self):
super().__init__()
self._properties = {
'Coverage': Coverage(),
'Boundary': Boundary(),
'Synthesis': Synthesis()
'Data Validity': DataValidity(),
'Data Structure': Structure(),
}
self._results_handler = DiagnosticReportResultsHandler()

def _handle_results(self, verbose):
self._results_handler.print_results(self._properties, verbose)

def get_results(self):
"""Return the diagnostic results.

Returns:
dict
The diagnostic results.
"""
self._check_report_generated()
return deepcopy(self._results_handler.results)
15 changes: 0 additions & 15 deletions sdmetrics/reports/single_table/quality_report.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Single table quality report."""
from sdmetrics.reports._results_handler import QualityReportResultsHandler
from sdmetrics.reports.base_report import BaseReport
from sdmetrics.reports.single_table._properties import ColumnPairTrends, ColumnShapes

Expand All @@ -17,17 +16,3 @@ def __init__(self):
'Column Shapes': ColumnShapes(),
'Column Pair Trends': ColumnPairTrends()
}
self._results_handler = QualityReportResultsHandler()

def _handle_results(self, verbose):
self._results_handler.print_results(self._properties, self._overall_score, verbose)

def get_score(self):
"""Return the overall quality score.

Returns:
float
The overall quality score.
"""
self._check_report_generated()
return self._overall_score
Loading