Skip to content

Commit

Permalink
fix(starrynight): import bugs and update index module
Browse files Browse the repository at this point in the history
  • Loading branch information
leoank committed Jan 8, 2025
1 parent 40f7885 commit 2cf85cf
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 41 deletions.
3 changes: 3 additions & 0 deletions starrynight/src/starrynight/experiments/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ class Experiment(BaseModel, ABC):
data_production_contact: str | None = None
data_processing_contact: str | None = None

# users should not access it directly
init_config_: BaseModel | None = None

@abstractstaticmethod
def from_index(index_path: Path, **kwargs: Unpack) -> Self:
"""Create experiment schema from index."""
Expand Down
4 changes: 2 additions & 2 deletions starrynight/src/starrynight/experiments/pcp_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ class PCPGeneric(Experiment):

sbs_config: SBSConfig
cp_config: CPConfig
path_parser: Callable[[str], MeasuredInventory] | None
# path_parser: Callable[[str], MeasuredInventory] | None

# users should not access it directly
_init_config: PCPGenericInitConfig | None
init_config_: PCPGenericInitConfig | None = None

@staticmethod
def from_index(index_path: Path, init_config: PCPGenericInitConfig) -> Self:
Expand Down
99 changes: 63 additions & 36 deletions starrynight/src/starrynight/modules/gen_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
ExecFunction,
TypeAlgorithmFromCitation,
TypeCitations,
TypeEnum,
TypeInput,
TypeOutput,
)
from starrynight.schema import DataConfig

Expand Down Expand Up @@ -46,15 +49,15 @@ def create_work_unit_gen_index(out_dir: Path | CloudPath) -> list[UnitOfWork]:
return uow_list


def create_pipe_gen_index(
uid: str, inventory_path: Path | CloudPath, out_dir: Path | CloudPath
) -> Pipeline:
def create_pipe_gen_index(uid: str, spec: SpecContainer) -> Pipeline:
"""Create pipeline for gen index.
Parameters
----------
uid: str
Module unique id.
spec: SpecContainer
GenIndexModule specification.
inventory_path : Path | CloudPath
Inventory path. Can be local or cloud.
out_dir : Path | CloudPath
Expand All @@ -70,20 +73,18 @@ def create_pipe_gen_index(
[
Container(
name=uid,
input_paths={"inventory": [inventory_path.resolve().__str__()]},
output_paths={
"index": [out_dir.joinpath("index.parquet").resolve().__str__()]
},
input_paths={"inventory": [spec.inputs[0].path]},
output_paths={"index": [spec.outputs[0].path]},
config=ContainerConfig(
image="ghrc.io/leoank/starrynight:dev",
cmd=[
"starrynight",
"index",
"gen",
"-i",
inventory_path.resolve().__str__(),
spec.inputs[0].path,
"-o",
out_dir.resolve().__str__(),
Path(spec.outputs[0].path).parent.resolve().__str__(),
],
env={},
),
Expand All @@ -101,42 +102,68 @@ def uid() -> str:
"""Return module unique id."""
return "generate_index"

@staticmethod
def _spec() -> str:
"""Return module default spec."""
return SpecContainer(
inputs=[
TypeInput(
name="inventory_path",
type=TypeEnum.files,
description="Path to the inventory.",
optional=False,
path="path/to/the/inventory",
),
],
outputs=[
TypeOutput(
name="project_index",
type=TypeEnum.file,
description="Generated Index",
optional=False,
path="random/path/to/index.parquet",
)
],
parameters=[],
display_only=[],
results=[],
exec_function=ExecFunction(
name="",
script="",
module="",
cli_command="",
),
docker_image=None,
algorithm_folder_name=None,
citations=TypeCitations(
algorithm=[
TypeAlgorithmFromCitation(
name="Starrynight indexing module",
description="This module generates an index for the dataset.",
)
]
),
)

@staticmethod
def from_config(
experiment: Experiment, data: DataConfig, spec: SpecContainer | None
) -> Self:
"""Create module from experiment and data config."""
if spec is None:
spec = SpecContainer(
inputs=[],
outputs=[],
parameters=[],
display_only=[],
results=[],
exec_function=ExecFunction(
name="",
script="",
module="",
cli_command="",
),
docker_image=None,
algorithm_folder_name=None,
citations=TypeCitations(
algorithm=[
TypeAlgorithmFromCitation(
name="Starrynight indexing module",
description="This module generates an index for the dataset.",
)
]
),
spec = GenIndexModule._spec()
spec.inputs[0].path = (
data.storage_path.joinpath("inventory/inventory.parquet")
.resolve()
.__str__()
)
spec.outputs[0].path = (
data.storage_path.joinpath("index/index.parquet").resolve().__str__()
)
pipe = create_pipe_gen_index(
uid=GenIndexModule.uid(),
inventory_path=data.dataset_path,
out_dir=data.storage_path.joinpath("inventory"),
)
uow = create_work_unit_gen_index(
out_dir=data.storage_path.joinpath("inventory")
spec=spec,
)
uow = create_work_unit_gen_index(out_dir=data.storage_path.joinpath("index"))

return GenIndexModule(spec=spec, pipe=pipe, uow=uow)
4 changes: 1 addition & 3 deletions starrynight/src/starrynight/modules/gen_inv.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ def create_work_unit_gen_inv(out_dir: Path | CloudPath) -> list[UnitOfWork]:
def create_pipe_gen_inv(
uid: str,
spec: SpecContainer,
dataset_path: Path | CloudPath,
out_dir: Path | CloudPath,
) -> Pipeline:
"""Create pipeline for gen inv.
Expand Down Expand Up @@ -115,7 +113,7 @@ def uid() -> str:

@staticmethod
def _spec() -> SpecContainer:
"""Return module unique id."""
"""Return module default spec."""
return SpecContainer(
inputs=[
TypeInput(
Expand Down

0 comments on commit 2cf85cf

Please sign in to comment.