From ecdde400ca7ea8c1b7480082e72af3cfe33debe8 Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Fri, 24 May 2024 13:52:48 +0200 Subject: [PATCH] Add data filter in tar extract (#2084) * add data filter in tar extract Signed-off-by: Ashwin Vaidya * add file exists check Signed-off-by: Ashwin Vaidya * cosmetics Signed-off-by: Ashwin Vaidya --------- Signed-off-by: Ashwin Vaidya --- src/anomalib/data/utils/download.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/anomalib/data/utils/download.py b/src/anomalib/data/utils/download.py index 558768b654..76761a0246 100644 --- a/src/anomalib/data/utils/download.py +++ b/src/anomalib/data/utils/download.py @@ -230,7 +230,9 @@ def safe_extract(tar_file: TarFile, root: Path, members: list[TarInfo]) -> None: """ for member in members: - tar_file.extract(member, root) + # check if the file already exists + if not (root / member.name).exists(): + tar_file.extract(member, root, filter="data") def generate_hash(file_path: str | Path, algorithm: str = "sha256") -> str: @@ -288,7 +290,7 @@ def extract(file_name: Path, root: Path) -> None: root (Path): Root directory where the dataset will be stored. """ - logger.info("Extracting dataset into root folder.") + logger.info(f"Extracting dataset into {root} folder.") # Safely extract zip files if file_name.suffix == ".zip":