Skip to content

Commit

Permalink
[MAINTENANCE] Enforce mandatory docstrings for public API decorated o…
Browse files Browse the repository at this point in the history
…bjects (#10799)

Co-authored-by: Bill Dirks <[email protected]>
Co-authored-by: Kristen Lavavej <[email protected]>
  • Loading branch information
3 people authored Jan 2, 2025
1 parent c1eb4a2 commit 735fac9
Show file tree
Hide file tree
Showing 51 changed files with 348 additions and 63 deletions.
14 changes: 13 additions & 1 deletion great_expectations/_docs_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class _PublicApiIntrospector:

# Only used for testing
_class_registry: dict[str, set[str]] = defaultdict(set)
_docstring_violations: set[str] = set()

# This is a special key that is used to indicate that a class definition
# is being added to the registry.
Expand All @@ -49,8 +50,14 @@ class _PublicApiIntrospector:
def class_registry(self) -> dict[str, set[str]]:
return self._class_registry

@property
def docstring_violations(self) -> set[str]:
return self._docstring_violations

def add(self, func: F) -> None:
self._add_to_docstring_violations(func)
self._add_to_class_registry(func)

try:
# We use an if statement instead of a ternary to work around
# mypy's inability to type narrow inside a ternary.
Expand All @@ -73,6 +80,11 @@ def add(self, func: F) -> None:
logger.exception(f"Could not add this function to the public API list: {func}")
raise

def _add_to_docstring_violations(self, func: F) -> None:
name = f"{func.__module__}.{func.__qualname__}"
if not func.__doc__ and name.startswith("great_expectations"):
self._docstring_violations.add(name)

def _add_to_class_registry(self, func: F) -> None:
if isinstance(func, type):
self._add_class_definition_to_registry(func)
Expand Down Expand Up @@ -135,7 +147,7 @@ def my_method(some_argument):
This tag is added at import time.
"""
public_api_introspector.add(func)
existing_docstring = func.__doc__ if func.__doc__ else ""
existing_docstring = func.__doc__ or ""
func.__doc__ = WHITELISTED_TAG + existing_docstring
return func

Expand Down
26 changes: 26 additions & 0 deletions great_expectations/checkpoint/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,24 @@ def run(
expectation_parameters: SuiteParameterDict | None = None,
run_id: RunIdentifier | None = None,
) -> CheckpointResult:
"""
Runs the Checkpoint's underlying Validation Definitions and Actions.
Args:
batch_parameters: Parameters to be used when loading the Batch.
expectation_parameters: Parameters to be used when validating the Batch.
run_id: An optional unique identifier for the run.
Returns:
A CheckpointResult object containing the results of the run.
Raises:
CheckpointRunWithoutValidationDefinitionError: If the Checkpoint is run without any
Validation Definitions.
CheckpointNotAddedError: If the Checkpoint has not been added to the Store.
CheckpointNotFreshError: If the Checkpoint has been modified since it was last added
to the Store.
"""
if not self.validation_definitions:
raise CheckpointRunWithoutValidationDefinitionError()

Expand Down Expand Up @@ -438,6 +456,7 @@ def is_fresh(self) -> CheckpointFreshnessDiagnostics:

@public_api
def save(self) -> None:
"""Save the current state of this Checkpoint."""
store = project_manager.get_checkpoints_store()
key = store.get_key(name=self.name, id=self.id)

Expand All @@ -457,6 +476,13 @@ def _add_to_store(self) -> None:

@public_api
class CheckpointResult(BaseModel):
"""
The result of running a Checkpoint.
Contains information about Expectation successes and failures from running
each Validation Definition in the Checkpoint.
"""

run_id: RunIdentifier
run_results: Dict[ValidationResultIdentifier, ExpectationSuiteValidationResult]
checkpoint_config: Checkpoint
Expand Down
3 changes: 3 additions & 0 deletions great_expectations/core/batch_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ class BatchDefinition(pydantic.GenericModel, Generic[PartitionerT]):
@property
@public_api
def data_asset(self) -> DataAsset[Any, PartitionerT]:
"""
The parent DataAsset for this Batch Definition.
"""
return self._data_asset

def set_data_asset(self, data_asset: DataAsset[Any, PartitionerT]) -> None:
Expand Down
4 changes: 4 additions & 0 deletions great_expectations/core/factory/checkpoint_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@

@public_api
class CheckpointFactory(Factory[Checkpoint]):
"""
Responsible for basic CRUD operations on a Data Context's Checkpoints.
"""

def __init__(self, store: CheckpointStore):
self._store = store

Expand Down
4 changes: 4 additions & 0 deletions great_expectations/core/factory/suite_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@

@public_api
class SuiteFactory(Factory[ExpectationSuite]):
"""
Responsible for basic CRUD operations on a Data Context's ExpectationSuites.
"""

def __init__(self, store: ExpectationsStore):
self._store = store

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@

@public_api
class ValidationDefinitionFactory(Factory[ValidationDefinition]):
"""
Responsible for basic CRUD operations on a Data Context's ValidationDefinitions.
"""

def __init__(self, store: ValidationDefinitionStore) -> None:
self._store = store

Expand Down
7 changes: 7 additions & 0 deletions great_expectations/core/validation_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,17 @@ class Config:
@property
@public_api
def batch_definition(self) -> BatchDefinition:
"""
The Batch Definition to validate.
"""
return self.data

@property
@public_api
def asset(self) -> DataAsset:
"""
The parent Data Asset of the Batch Definition.
"""
return self.data.data_asset

@property
Expand Down Expand Up @@ -365,6 +371,7 @@ def identifier_bundle(self) -> _IdentifierBundle:

@public_api
def save(self) -> None:
"""Save the current state of this ValidationDefinition."""
store = project_manager.get_validation_definition_store()
key = store.get_key(name=self.name, id=self.id)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,9 @@ def datasource_store(self) -> DatasourceStore:
@property
@public_api
def suites(self) -> SuiteFactory:
"""
Responsible for basic CRUD operations on a context's ExpectationSuites.
"""
if not self._suites:
raise gx_exceptions.DataContextError( # noqa: TRY003
"DataContext requires a configured ExpectationsStore to persist ExpectationSuites."
Expand All @@ -438,6 +441,9 @@ def suites(self) -> SuiteFactory:
@property
@public_api
def checkpoints(self) -> CheckpointFactory:
"""
Responsible for basic CRUD operations on a context's Checkpoints.
"""
if not self._checkpoints:
raise gx_exceptions.DataContextError( # noqa: TRY003
"DataContext requires a configured CheckpointStore to persist Checkpoints."
Expand All @@ -447,6 +453,9 @@ def checkpoints(self) -> CheckpointFactory:
@property
@public_api
def validation_definitions(self) -> ValidationDefinitionFactory:
"""
Responsible for basic CRUD operations on a context's ValidationDefinitions.
"""
if not self._validation_definitions:
raise gx_exceptions.DataContextError( # noqa: TRY003
"DataContext requires a configured ValidationDefinitionStore to persist "
Expand Down Expand Up @@ -532,6 +541,9 @@ def checkpoint_store(self) -> CheckpointStore:
@property
@public_api
def data_sources(self) -> DataSourceManager:
"""
Responsible for basic CRUD operations on a context's DataSources.
"""
return self._data_sources

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ class DirectoryDataAsset(PathDataAsset[DatasourceT, ColumnPartitioner], Generic[

@public_api
def add_batch_definition_daily(self, name: str, column: str) -> BatchDefinition:
"""
Add a BatchDefinition, which creates a single Batch for each day in the directory.
Args:
name: Name of the Batch Definition.
column: Column to partition on.
Returns:
A BatchDefinition that is partitioned daily.
"""
# todo: test column
return self.add_batch_definition(
name=name,
Expand All @@ -55,6 +65,16 @@ def add_batch_definition_daily(self, name: str, column: str) -> BatchDefinition:

@public_api
def add_batch_definition_monthly(self, name: str, column: str) -> BatchDefinition:
"""
Add a BatchDefinition which creates a single batch for each month in the directory.
Args:
name: Name of the Batch Definition.
column: Column to partition on.
Returns:
A BatchDefinition that is partitioned monthly.
"""
# todo: test column
return self.add_batch_definition(
name=name,
Expand All @@ -65,6 +85,16 @@ def add_batch_definition_monthly(self, name: str, column: str) -> BatchDefinitio

@public_api
def add_batch_definition_yearly(self, name: str, column: str) -> BatchDefinition:
"""
Add a BatchDefinition which creates a single batch for each year in the directory.
Args:
name: Name of the Batch Definition.
column: Column to partition on.
Returns:
A BatchDefinition that is partitioned yearly.
"""
# todo: test column
return self.add_batch_definition(
name=name,
Expand Down
29 changes: 29 additions & 0 deletions great_expectations/datasource/fluent/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,13 @@ def _sorter_from_str(sort_key: str) -> Sorter:

@public_api
class DataAsset(GenericBaseModel, Generic[DatasourceT, PartitionerT], ABC):
"""
A Data Asset is a collection of records within a Data Source, which is usually named based
on the underlying data system and sliced to correspond to a desired specification.
Data Assets are used to specify how Great Expectations will organize data into Batches.
"""

# To subclass a DataAsset one must define `type` as a Class literal explicitly on the sublass
# as well as implementing the methods in the `Abstract Methods` section below.
# Some examples:
Expand Down Expand Up @@ -619,6 +626,11 @@ class Datasource(
Generic[_DataAssetT, _ExecutionEngineT],
metaclass=MetaDatasource,
):
"""
A Datasource provides a standard API for accessing and interacting with data from
a wide variety of source systems.
"""

# To subclass Datasource one needs to define:
# asset_types
# type
Expand Down Expand Up @@ -1154,6 +1166,23 @@ def validate(
result_format: ResultFormatUnion = DEFAULT_RESULT_FORMAT,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> ExpectationValidationResult | ExpectationSuiteValidationResult:
"""
Validate the Batch using the provided Expectation or Expectation Suite.
Args:
expect: The Expectation or Expectation Suite to validate.
result_format: The format to return the validation results in.
expectation_parameters: A dictionary of parameters values for any
expectations using parameterized values (the $PARAMETER syntax).
The keys are the parameter names and the values are the values
to be used for this validation run.
Returns:
An ExpectationValidationResult or ExpectationSuiteValidationResult object.
Raises:
ValueError: If the expect argument is not an Expectation or an ExpectationSuite.
"""
from great_expectations.core import ExpectationSuite
from great_expectations.expectations.expectation import Expectation

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ class PandasAzureBlobStorageDatasourceError(PandasDatasourceError):

@public_api
class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource):
"""
PandasAzureBlobStorageDatasource is a PandasDatasource that uses Azure Blob Storage as a
data store.
"""

# class attributes
data_connector_type: ClassVar[Type[AzureBlobStorageDataConnector]] = (
AzureBlobStorageDataConnector
Expand Down
13 changes: 13 additions & 0 deletions great_expectations/datasource/fluent/pandas_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ class PandasDatasourceError(Exception):

@public_api
class _PandasDataAsset(DataAsset):
"""
A Pandas DataAsset is a DataAsset that is backed by a Pandas DataFrame.
"""

_EXCLUDE_FROM_READER_OPTIONS: ClassVar[Set[str]] = {
"batch_definitions",
"batch_metadata",
Expand Down Expand Up @@ -211,6 +215,15 @@ def build_batch_request(

@public_api
def add_batch_definition_whole_dataframe(self, name: str) -> BatchDefinition:
"""
Add a BatchDefinition that requests the whole dataframe.
Args:
name: The name of the BatchDefinition.
Returns:
A BatchDefinition with no partitioning.
"""
return self.add_batch_definition(
name=name,
partitioner=None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ class PandasGoogleCloudStorageDatasourceError(PandasDatasourceError):

@public_api
class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource):
"""
PandasGoogleCloudStorageDatasource is a PandasDatasource that uses Google Cloud Storage as a
data store.
"""

# class attributes
data_connector_type: ClassVar[Type[GoogleCloudStorageDataConnector]] = (
GoogleCloudStorageDataConnector
Expand Down
4 changes: 4 additions & 0 deletions great_expectations/datasource/fluent/pandas_s3_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ class PandasS3DatasourceError(PandasDatasourceError):

@public_api
class PandasS3Datasource(_PandasFilePathDatasource):
"""
PandasS3Datasource is a PandasDatasource that uses Amazon S3 as a data store.
"""

# class attributes
data_connector_type: ClassVar[Type[S3DataConnector]] = S3DataConnector
# these fields should not be passed to the execution engine
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@
},
"TableAsset": {
"title": "TableAsset",
"description": "--Public API--",
"description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.",
"type": "object",
"properties": {
"name": {
Expand Down Expand Up @@ -486,7 +486,7 @@
},
"QueryAsset": {
"title": "QueryAsset",
"description": "--Public API--",
"description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset",
"type": "object",
"properties": {
"name": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "DatabricksTableAsset",
"description": "--Public API--",
"description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.",
"type": "object",
"properties": {
"name": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "QueryAsset",
"description": "--Public API--",
"description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset",
"type": "object",
"properties": {
"name": {
Expand Down
Loading

0 comments on commit 735fac9

Please sign in to comment.