diff --git a/.github/workflows/gar-publish.yaml b/.github/workflows/gar-publish.yaml
index a051f0c..3f228c1 100644
--- a/.github/workflows/gar-publish.yaml
+++ b/.github/workflows/gar-publish.yaml
@@ -5,7 +5,7 @@ name: Deploy single image to GAR (Google Artifact Registry)
 on:
   workflow_dispatch:
   release:
-    types: [created]
+    types: [published]
 env:
   PROJECT_ID: easyml-394818
   GAR_LOCATION: us-central1
@@ -36,5 +36,5 @@ jobs:
       - name: 'Build Inventory Image'
         working-directory: .
         run: |
-          docker build --build-arg GITHUB_TOKEN=${{secrets.GHCR_PAT}} . --tag $GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/internal/datadreamer:latest
+          docker build --build-arg GITHUB_TOKEN=${{secrets.GHCR_PAT}} . --tag $GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/internal/datadreamer:latest --tag $GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/internal/datadreamer:${{ github.event.release.tag_name }}
           docker push $GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/internal/datadreamer --all-tags
diff --git a/.github/workflows/ghcr-publish.yaml b/.github/workflows/ghcr-publish.yaml
index ddd8347..9bedead 100644
--- a/.github/workflows/ghcr-publish.yaml
+++ b/.github/workflows/ghcr-publish.yaml
@@ -3,7 +3,7 @@ name: Docker Build and Publish
 on:
   workflow_dispatch:
   release:
-    types: [created]
+    types: [published]
 
 jobs:
   build-and-push:
diff --git a/.gitignore b/.gitignore
index 7759c34..dac496a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,5 +153,6 @@ Thumbs.db
 # Others
 node_modules/
 **generated_dataset*/
+**gen_dataset*/
 **runs/
 **wandb/
diff --git a/README.md b/README.md
index 821f7ae..f15831f 100644
--- a/README.md
+++ b/README.md
@@ -138,6 +138,12 @@ Run the following command in your terminal to use the script:
 datadreamer --save_dir <directory> --class_names <objects> --prompts_number <number> [additional options]
 ```
 
+or using a `.yaml` config file
+
+```bash
+datadreamer --config <path-to-config>
+```
+
 <a name="main-parameters"></a>
 
 ### 🎯 Main Parameters
@@ -152,6 +158,8 @@ datadreamer --save_dir <directory> --class_names <objects> --prompts_number <num
 ### 🔧 Additional Parameters
 
 - `--task`: Choose between detection and classification. Default is `detection`.
+- `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`.
+- `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`.
 - `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.
 - `--prompt_generator`: Choose between `simple`, `lm` (language model) and `tiny` (tiny LM). Default is `simple`.
 - `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.
@@ -161,17 +169,18 @@ datadreamer --save_dir <directory> --class_names <objects> --prompts_number <num
 - `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `""`.
 - `--prompt_suffix`: Suffix to add to every image generation prompt, e.g., for adding details like resolution. Default is `", hd, 8k, highly detailed"`.
 - `--negative_prompt`: Negative prompts to guide the generation away from certain features. Default is `"cartoon, blue skin, painting, scrispture, golden, illustration, worst quality, low quality, normal quality:2, unrealistic dream, low resolution,  static, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, bad anatomy"`.
-- `--use_tta`: Toggle test time augmentation for object detection. Default is `True`.
+- `--use_tta`: Toggle test time augmentation for object detection. Default is `False`.
 - `--synonym_generator`: Enhance class names with synonyms. Default is `none`. Other options are `llm`, `wordnet`.
 - `--use_image_tester`: Use image tester for image generation. Default is `False`.
 - `--image_tester_patience`: Patience level for image tester. Default is `1`.
 - `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`.
 - `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`.
 - `--batch_size_prompt`: Batch size for prompt generation. Default is 64.
-- `--batch_size_annotation`: Batch size for annotation. Default is `8`.
+- `--batch_size_annotation`: Batch size for annotation. Default is `1`.
 - `--batch_size_image`: Batch size for image generation. Default is `1`.
 - `--device`: Choose between `cuda` and `cpu`. Default is `cuda`.
 - `--seed`: Set a random seed for image and prompt generation. Default is `42`.
+- `--config`: A path to an optional `.yaml` config file specifying the pipeline's arguments.
 
 <a name="available-models"></a>
 
@@ -196,8 +205,16 @@ datadreamer --save_dir <directory> --class_names <objects> --prompts_number <num
 datadreamer --save_dir path/to/save_directory --class_names person moon robot --prompts_number 20 --prompt_generator simple --num_objects_range 1 3 --image_generator sdxl-turbo
 ```
 
+or using a `.yaml` config file (if arguments are provided with the config file in the command, they will override the ones in the config file):
+
+```bash
+datadreamer --save_dir path/to/save_directory --config configs/det_config.yaml
+```
+
 This command generates images for the specified objects, saving them and their annotations in the given directory. The script allows customization of the generation process through various parameters, adapting to different needs and hardware configurations.
 
+See `/configs` folder for some examples of the `.yaml` config files.
+
 <a name="output"></a>
 
 ### 📦 Output
diff --git a/configs/cls_config.yaml b/configs/cls_config.yaml
new file mode 100644
index 0000000..17f028c
--- /dev/null
+++ b/configs/cls_config.yaml
@@ -0,0 +1,7 @@
+class_names: [person, moon, robot]
+prompts_number: 20
+prompt_generator: simple
+num_objects_range: [1, 3]
+image_generator: sdxl-turbo
+task: classification
+image_annotator: clip
\ No newline at end of file
diff --git a/configs/det_config.yaml b/configs/det_config.yaml
new file mode 100644
index 0000000..42a107e
--- /dev/null
+++ b/configs/det_config.yaml
@@ -0,0 +1,6 @@
+class_names: [person, moon, robot]
+prompts_number: 20
+prompt_generator: simple
+num_objects_range: [1, 3]
+image_generator: sdxl-turbo
+task: detection
\ No newline at end of file
diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py
index f1d3ee2..cc5750e 100644
--- a/datadreamer/pipelines/generate_dataset_from_scratch.py
+++ b/datadreamer/pipelines/generate_dataset_from_scratch.py
@@ -1,13 +1,15 @@
 from __future__ import annotations
 
 import argparse
-import json
 import os
+import shutil
+import uuid
 
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
+from box import Box
 from PIL import Image
 from tqdm import tqdm
 
@@ -24,6 +26,8 @@
     TinyLlamaLMPromptGenerator,
     WordNetSynonymGenerator,
 )
+from datadreamer.utils import Config, convert_dataset
+from datadreamer.utils.dataset_utils import save_annotations_to_json
 
 prompt_generators = {
     "simple": SimplePromptGenerator,
@@ -52,14 +56,12 @@ def parse_args():
     parser.add_argument(
         "--save_dir",
         type=str,
-        default="generated_dataset",
         help="Directory to save generated images and annotations",
     )
 
     parser.add_argument(
         "--task",
         type=str,
-        default="detection",
         choices=["detection", "classification"],
         help="Task to generate data for",
     )
@@ -68,54 +70,64 @@ def parse_args():
         "--class_names",
         type=str,
         nargs="+",
-        default=["bear", "bicycle", "bird", "person"],
         help="List of object names for prompt generation",
     )
 
     parser.add_argument(
         "--annotate_only",
         action="store_true",
+        default=None,
         help="Only annotate the images without generating new ones, prompt and image generator will be skipped.",
     )
 
     parser.add_argument(
-        "--prompts_number", type=int, default=10, help="Number of prompts to generate"
+        "--prompts_number",
+        type=int,
+        help="Number of prompts to generate",
     )
 
     parser.add_argument(
         "--num_objects_range",
         type=int,
         nargs="+",
-        default=[1, 3],
         help="Range of number of objects in a prompt",
     )
 
     parser.add_argument(
         "--prompt_generator",
         type=str,
-        default="simple",
         choices=["simple", "lm", "tiny"],
         help="Prompt generator to use: simple or language model",
     )
     parser.add_argument(
         "--image_generator",
         type=str,
-        default="sdxl-turbo",
         choices=["sdxl", "sdxl-turbo", "sdxl-lightning"],
         help="Image generator to use",
     )
     parser.add_argument(
         "--image_annotator",
         type=str,
-        default="owlv2",
         choices=["owlv2", "clip"],
         help="Image annotator to use",
     )
 
+    parser.add_argument(
+        "--dataset_format",
+        type=str,
+        choices=["raw", "yolo", "coco", "luxonis-dataset", "cls-single"],
+        help="Dataset format to use",
+    )
+    parser.add_argument(
+        "--split_ratios",
+        type=float,
+        nargs="+",
+        help="Train-validation-test split ratios (default: 0.8, 0.1, 0.1).",
+    )
+
     parser.add_argument(
         "--synonym_generator",
         type=str,
-        default="none",
         choices=["none", "llm", "wordnet"],
         help="Image annotator to use",
     )
@@ -123,48 +135,43 @@ def parse_args():
     parser.add_argument(
         "--negative_prompt",
         type=str,
-        default="cartoon, blue skin, painting, scrispture, golden, illustration, worst quality, low quality, normal quality:2, unrealistic dream, low resolution,  static, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, bad anatomy",
         help="Negative prompt to guide the generation away from certain features",
     )
 
     parser.add_argument(
         "--prompt_suffix",
         type=str,
-        default=", hd, 8k, highly detailed",
         help="Suffix to add to every image generation prompt, e.g., for adding details like resolution",
     )
 
     parser.add_argument(
         "--prompt_prefix",
         type=str,
-        default="",
         help="Prefix to add to every image generation prompt",
     )
 
     parser.add_argument(
         "--conf_threshold",
         type=float,
-        default=0.15,
         help="Confidence threshold for annotation",
     )
 
     parser.add_argument(
         "--annotation_iou_threshold",
         type=float,
-        default=0.2,
         help="Intersection over Union (IoU) threshold for annotation",
     )
 
     parser.add_argument(
         "--use_tta",
-        default=False,
+        default=None,
         action="store_true",
         help="Whether to use test time augmentation for object detection",
     )
 
     parser.add_argument(
         "--use_image_tester",
-        default=False,
+        default=None,
         action="store_true",
         help="Whether to use image tester for image generation",
     )
@@ -172,14 +179,12 @@ def parse_args():
     parser.add_argument(
         "--image_tester_patience",
         type=int,
-        default=1,
         help="Patience for image tester",
     )
 
     parser.add_argument(
         "--lm_quantization",
         type=str,
-        default="none",
         choices=["none", "4bit"],
         help="Quantization to use for Mistral language model",
     )
@@ -187,7 +192,6 @@ def parse_args():
     parser.add_argument(
         "--annotator_size",
         type=str,
-        default="base",
         choices=["base", "large"],
         help="Size of the annotator model to use",
     )
@@ -195,34 +199,38 @@ def parse_args():
     parser.add_argument(
         "--batch_size_prompt",
         type=int,
-        default=64,
         help="Batch size for prompt generation",
     )
 
     parser.add_argument(
         "--batch_size_annotation",
         type=int,
-        default=1,
         help="Batch size for annotation",
     )
 
     parser.add_argument(
         "--batch_size_image",
         type=int,
-        default=1,
         help="Batch size for image generation",
     )
 
     parser.add_argument(
         "--device",
         type=str,
-        default="cuda",
         choices=["cuda", "cpu"],
         help="Device to use",
     )
 
     parser.add_argument(
-        "--seed", type=int, default=42, help="Random seed for image generation"
+        "--config",
+        type=str,
+        help="Path to the configuration file",
+    )
+
+    parser.add_argument(
+        "--seed",
+        type=int,
+        help="Random seed for image generation",
     )
 
     return parser.parse_args()
@@ -242,9 +250,6 @@ def check_args(args):
     ):
         raise ValueError("--class_names must be a non-empty list of strings")
 
-    if args.annotate_only and not args.task == "detection":
-        raise ValueError("--annotate_only can only be used with --task=detection")
-
     # Check prompts_number
     if args.prompts_number <= 0:
         raise ValueError("--prompts_number must be a positive integer")
@@ -319,61 +324,51 @@ def check_args(args):
             "--image_annotator must be one of the available annotators for classification task"
         )
 
+    # Check coorect task and dataset_format
+    if args.task == "classification" and args.dataset_format in ["coco", "yolo"]:
+        raise ValueError(
+            "--dataset_format must be one of the available dataset formats for classification task: raw, cls-single, luxonis-dataset"
+        )
+
+    if args.task == "detection" and args.dataset_format in ["cls-single"]:
+        raise ValueError(
+            "--dataset_format must be one of the available dataset formats for detection task: raw, coco, yolo, luxonis-dataset"
+        )
 
-def save_det_annotations_to_json(
-    image_paths,
-    boxes_list,
-    labels_list,
-    class_names,
-    save_dir,
-    file_name="annotations.json",
-):
-    annotations = {}
-    for image_path, bboxes, labels in zip(image_paths, boxes_list, labels_list):
-        image_name = os.path.basename(image_path)
-        annotations[image_name] = {
-            "boxes": bboxes.tolist(),
-            "labels": labels.tolist(),
-        }
-    annotations["class_names"] = class_names
-
-    # Save to JSON file
-    with open(os.path.join(save_dir, file_name), "w") as f:
-        json.dump(annotations, f, indent=4)
-
-
-def save_clf_annotations_to_json(
-    image_paths, labels_list, class_names, save_dir, file_name="annotations.json"
-):
-    annotations = {}
-    for image_path, labels in zip(image_paths, labels_list):
-        image_name = os.path.basename(image_path)
-        annotations[image_name] = {
-            "labels": labels.tolist(),
-        }
-    annotations["class_names"] = class_names
-
-    # Save to JSON file
-    with open(os.path.join(save_dir, file_name), "w") as f:
-        json.dump(annotations, f, indent=4)
+    # Check split_ratios
+    if (
+        len(args.split_ratios) != 3
+        or not all(0 <= ratio <= 1 for ratio in args.split_ratios)
+        or sum(args.split_ratios) != 1
+    ):
+        raise ValueError(
+            "--split_ratios must be a list of three floats that sum up to 1"
+        )
 
 
 def main():
     args = parse_args()
+    # Get the None args without the config
+    args_dict = {k: v for k, v in vars(args).items() if k != "config" and v is not None}
+    config = Config.get_config(args.config, args_dict)
+    args = Box(config.model_dump(exclude_none=True, by_alias=True))
+    # Check arguments
     check_args(args)
 
+    # Directories for saving images and bboxes
     save_dir = args.save_dir
+    if not args.annotate_only:
+        if os.path.exists(save_dir):
+            shutil.rmtree(save_dir)
+        os.makedirs(save_dir)
 
-    # Directories for saving images and bboxes
     bbox_dir = os.path.join(save_dir, "bboxes_visualization")
-    if not os.path.exists(save_dir):
-        os.makedirs(save_dir)
-    if not os.path.exists(bbox_dir):
-        os.makedirs(bbox_dir)
+    if os.path.exists(bbox_dir):
+        shutil.rmtree(bbox_dir)
+    os.makedirs(bbox_dir)
 
     # Save arguments
-    with open(os.path.join(save_dir, "generation_args.json"), "w") as f:
-        json.dump(vars(args), f, indent=4)
+    config.save_data(os.path.join(save_dir, "generation_args.yaml"))
 
     generated_prompts = None
     image_paths = []
@@ -417,7 +412,9 @@ def main():
             prompts, prompt_objects
         ):
             for generated_image in generated_images_batch:
-                image_path = os.path.join(save_dir, f"image_{num_generated_images}.jpg")
+                unique_id = uuid.uuid4().hex
+                unique_filename = f"image_{num_generated_images}_{unique_id}.jpg"
+                image_path = os.path.join(save_dir, unique_filename)
                 generated_image.save(image_path)
                 image_paths.append(image_path)
                 num_generated_images += 1
@@ -442,12 +439,15 @@ def main():
             synonym_dict, os.path.join(save_dir, "synonyms.json")
         )
 
+    boxes_list = []
+    scores_list = []
+    labels_list = []
+
     if args.task == "classification":
         # Classification annotation
         annotator_class = clf_annotators[args.image_annotator]
         annotator = annotator_class(device=args.device, size=args.annotator_size)
 
-        labels_list = []
         # Split image_paths into batches
         image_batches = [
             image_paths[i : i + args.batch_size_annotation]
@@ -468,24 +468,32 @@ def main():
             )
             labels_list.extend(batch_labels)
 
-        save_clf_annotations_to_json(
-            image_paths, labels_list, args.class_names, save_dir
+        save_annotations_to_json(
+            image_paths=image_paths,
+            labels_list=labels_list,
+            class_names=args.class_names,
+            save_dir=save_dir,
         )
+
+        if args.dataset_format == "cls-single":
+            convert_dataset.convert_dataset(
+                args.save_dir,
+                args.save_dir,
+                "cls-single",
+                args.split_ratios,
+                copy_files=False,
+                seed=args.seed,
+            )
     else:
         # Annotation
         annotator_class = det_annotators[args.image_annotator]
         annotator = annotator_class(device=args.device, size=args.annotator_size)
 
-        boxes_list = []
-        scores_list = []
-        labels_list = []
-
         # Split image_paths into batches
         image_batches = [
             image_paths[i : i + args.batch_size_annotation]
             for i in range(0, len(image_paths), args.batch_size_annotation)
         ]
-
         for i, image_batch in tqdm(
             enumerate(image_batches),
             desc="Annotating images",
@@ -546,8 +554,44 @@ def main():
                 plt.close()
 
         # Save annotations as JSON files
-        save_det_annotations_to_json(
-            image_paths, boxes_list, labels_list, args.class_names, save_dir
+        save_annotations_to_json(
+            image_paths=image_paths,
+            labels_list=labels_list,
+            boxes_list=boxes_list,
+            class_names=args.class_names,
+            save_dir=save_dir,
+        )
+
+        if args.dataset_format == "yolo":
+            # Convert annotations to YOLO format
+            convert_dataset.convert_dataset(
+                args.save_dir,
+                args.save_dir,
+                "yolo",
+                args.split_ratios,
+                copy_files=False,
+                seed=args.seed,
+            )
+        # Convert annotations to COCO format
+        elif args.dataset_format == "coco":
+            convert_dataset.convert_dataset(
+                args.save_dir,
+                args.save_dir,
+                "coco",
+                args.split_ratios,
+                copy_files=False,
+                seed=args.seed,
+            )
+
+    # Convert annotations to LuxonisDataset format
+    if args.dataset_format == "luxonis-dataset":
+        convert_dataset.convert_dataset(
+            args.save_dir,
+            args.save_dir,
+            "luxonis-dataset",
+            args.split_ratios,
+            copy_files=False,
+            seed=args.seed,
         )
 
 
diff --git a/datadreamer/utils/__init__.py b/datadreamer/utils/__init__.py
index 9d48db4..a055737 100644
--- a/datadreamer/utils/__init__.py
+++ b/datadreamer/utils/__init__.py
@@ -1 +1,17 @@
 from __future__ import annotations
+
+from .base_converter import BaseConverter
+from .coco_converter import COCOConverter
+from .config import Config
+from .luxonis_dataset_converter import LuxonisDatasetConverter
+from .single_label_cls_converter import SingleLabelClsConverter
+from .yolo_converter import YOLOConverter
+
+__all__ = [
+    "BaseConverter",
+    "COCOConverter",
+    "LuxonisDatasetConverter",
+    "YOLOConverter",
+    "SingleLabelClsConverter",
+    "Config",
+]
diff --git a/datadreamer/utils/base_converter.py b/datadreamer/utils/base_converter.py
new file mode 100644
index 0000000..3d97199
--- /dev/null
+++ b/datadreamer/utils/base_converter.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import json
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+
+class BaseConverter(ABC):
+    """Abstract base class for converter."""
+
+    def __init__(self, seed=42):
+        np.random.seed(seed)
+
+    @abstractmethod
+    def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True):
+        """Converts a dataset into another format.
+
+        Args:
+        - dataset_dir (str): The directory where the source dataset is located.
+        - output_dir (str): The directory where the processed dataset should be saved.
+        - split_ratios (list of float): The ratios to split the data into training, validation, and test sets.
+        - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True.
+
+
+        No return value.
+        """
+        pass
+
+    @staticmethod
+    def read_annotations(annotation_path):
+        """Reads annotations from a JSON file located at the specified path.
+
+        Args:
+        - annotation_path (str): The path to the JSON file containing annotations.
+
+        Returns:
+        - dict: A dictionary containing the data loaded from the JSON file.
+        """
+        with open(annotation_path) as f:
+            data = json.load(f)
+        return data
+
+    @staticmethod
+    def make_splits(images, split_ratios, shuffle=True):
+        """Splits the list of images into training, validation, and test sets.
+
+        Args:
+        - images (list of str): A list of image paths.
+        - split_ratios (list of float): The ratios to split the data into training, validation, and test sets.
+        - shuffle (bool, optional): Whether to shuffle the list of images. Defaults to True.
+
+        Returns:
+        - list of str: A list of image paths for the training set.
+        - list of str: A list of image paths for the validation set.
+        - list of str: A list of image paths for the test set.
+        """
+        if shuffle:
+            np.random.shuffle(images)
+
+        train_images = images[: int(len(images) * split_ratios[0])]
+        val_images = images[
+            int(len(images) * split_ratios[0]) : int(
+                len(images) * (split_ratios[0] + split_ratios[1])
+            )
+        ]
+        test_images = images[int(len(images) * (split_ratios[0] + split_ratios[1])) :]
+
+        return train_images, val_images, test_images
diff --git a/datadreamer/utils/coco_converter.py b/datadreamer/utils/coco_converter.py
new file mode 100644
index 0000000..ba02d97
--- /dev/null
+++ b/datadreamer/utils/coco_converter.py
@@ -0,0 +1,151 @@
+from __future__ import annotations
+
+import json
+import os
+import shutil
+
+from PIL import Image
+
+from datadreamer.utils.base_converter import BaseConverter
+
+
+class COCOConverter(BaseConverter):
+    """Class for converting a dataset to COCO format.
+
+    Format:
+
+    dataset_dir
+    ├── train
+    │   ├── data
+    │   │   ├── 0.jpg
+    │   │   ├── 1.jpg
+    │   ├── labels.json
+    ├── validation
+    │   ├── data
+    │   ├── labels.json
+    ├── test
+    │   ├── data
+    │   ├── labels.json
+    """
+
+    def __init__(self, seed=42):
+        super().__init__(seed)
+
+    def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True):
+        """Converts a dataset into a COCO format.
+
+        Args:
+        - dataset_dir (str): The directory where the source dataset is located.
+        - output_dir (str): The directory where the processed dataset should be saved.
+        - split_ratios (list of float): The ratios to split the data into training, validation, and test sets.
+        - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True.
+
+        No return value.
+        """
+        annotation_path = os.path.join(dataset_dir, "annotations.json")
+        data = BaseConverter.read_annotations(annotation_path)
+        self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files)
+
+    def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True):
+        """Processes the data by dividing it into training and validation sets, and
+        saves the images and labels in COCO format.
+
+        Args:
+        - data (dict): The dictionary containing image annotations.
+        - image_dir (str): The directory where the source images are located.
+        - output_dir (str): The base directory where the processed data will be saved.
+        - split_ratios (float): The ratio to split the data into training, validation, and test sets.
+        - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True.
+
+
+        No return value.
+        """
+        images = list(data.keys())
+        images.remove("class_names")
+
+        train_images, val_images, test_images = BaseConverter.make_splits(
+            images, split_ratios
+        )
+
+        for dataset_type, image_set in [
+            ("train", train_images),
+            ("validation", val_images),
+            ("test", test_images),
+        ]:
+            dataset_output_dir = os.path.join(output_dir, dataset_type)
+            data_output_dir = os.path.join(dataset_output_dir, "data")
+
+            if os.path.exists(data_output_dir):
+                shutil.rmtree(data_output_dir)
+
+            os.makedirs(data_output_dir)
+
+            images_info = []
+            annotations = []
+            annotation_id = 0
+
+            for image_name in image_set:
+                image_full_path = os.path.join(image_dir, image_name)
+                annotation = data[image_name]
+                image = Image.open(image_full_path)
+                image_width, image_height = image.size
+
+                images_info.append(
+                    {
+                        "id": len(images_info) + 1,
+                        "file_name": image_name,
+                        "width": image_width,
+                        "height": image_height,
+                    }
+                )
+
+                for box, label in zip(annotation["boxes"], annotation["labels"]):
+                    annotations.append(
+                        {
+                            "id": annotation_id,
+                            "image_id": len(images_info),
+                            "category_id": label,
+                            "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]],
+                            "segmentation": None,  # [[box[0], box[1], box[2], box[1], box[2], box[3], box[0], box[3]]], # bbox mask
+                            "area": (box[2] - box[0]) * (box[3] - box[1]),
+                            "iscrowd": 0,
+                        }
+                    )
+                    annotation_id += 1
+
+                if copy_files:
+                    shutil.copy(
+                        image_full_path, os.path.join(data_output_dir, image_name)
+                    )
+                else:
+                    shutil.move(
+                        image_full_path, os.path.join(data_output_dir, image_name)
+                    )
+
+            self.save_labels(
+                dataset_output_dir, images_info, annotations, data["class_names"]
+            )
+
+    def save_labels(self, dataset_output_dir, images_info, annotations, class_names):
+        """Saves the labels to a JSON file.
+
+        Args:
+        - dataset_output_dir (str): The directory where the labels should be saved.
+        - images_info (list of dict): A list of dictionaries containing image information.
+        - annotations (list of dict): A list of dictionaries containing annotation information.
+        - class_names (list of str): A list of class names.
+
+        No return value.
+        """
+
+        with open(os.path.join(dataset_output_dir, "labels.json"), "w") as f:
+            json.dump(
+                {
+                    "images": images_info,
+                    "annotations": annotations,
+                    "categories": [
+                        {"id": i, "name": name} for i, name in enumerate(class_names)
+                    ],
+                },
+                f,
+            )
diff --git a/datadreamer/utils/config.py b/datadreamer/utils/config.py
new file mode 100644
index 0000000..2e87832
--- /dev/null
+++ b/datadreamer/utils/config.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from typing import Annotated, List, Literal
+
+from luxonis_ml.utils import LuxonisConfig
+from pydantic import Field
+
+
+class Config(LuxonisConfig):
+    # General arguments
+    save_dir: str = "generated_dataset"
+    class_names: List[str] = ["bear", "bicycle", "bird", "person"]
+    prompts_number: int = 10
+    task: Literal["detection", "classification"] = "detection"
+    seed: int = 42
+    device: Literal["cuda", "cpu"] = "cuda"
+    annotate_only: bool = False
+    dataset_format: Literal[
+        "raw", "yolo", "coco", "luxonis-dataset", "cls-single"
+    ] = "raw"
+    split_ratios: Annotated[
+        List[float], Field(default=[0.8, 0.1, 0.1], min_length=3, max_length=3)
+    ] = [0.8, 0.1, 0.1]
+    # Prompt generation arguments
+    prompt_generator: Literal["simple", "lm", "tiny"] = "simple"
+    synonym_generator: Literal["none", "llm", "wordnet"] = "none"
+    num_objects_range: Annotated[
+        List[int], Field(default=[1, 3], min_length=2, max_length=2)
+    ] = [1, 3]
+    lm_quantization: Literal["none", "4bit"] = "none"
+    batch_size_prompt: int = 64
+    # Image generation arguments
+    image_generator: Literal["sdxl", "sdxl-turbo", "sdxl-lightning"] = "sdxl-turbo"
+    prompt_prefix: str = ""
+    prompt_suffix: str = ", hd, 8k, highly detailed"
+    negative_prompt: str = "cartoon, blue skin, painting, scrispture, golden, illustration, worst quality, low quality, normal quality:2, unrealistic dream, low resolution, static, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, bad anatomy"
+    batch_size_image: int = 1
+    use_image_tester: bool = False
+    image_tester_patience: int = 1
+    # Annotation arguments
+    image_annotator: Literal["owlv2", "clip"] = "owlv2"
+    conf_threshold: float = 0.15
+    annotation_iou_threshold: float = 0.2
+    use_tta: bool = False
+    annotator_size: Literal["base", "large"] = "base"
+    batch_size_annotation: int = 1
diff --git a/datadreamer/utils/convert_dataset.py b/datadreamer/utils/convert_dataset.py
new file mode 100644
index 0000000..7b028e4
--- /dev/null
+++ b/datadreamer/utils/convert_dataset.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import argparse
+
+from datadreamer.utils import (
+    COCOConverter,
+    LuxonisDatasetConverter,
+    SingleLabelClsConverter,
+    YOLOConverter,
+)
+
+
+def convert_dataset(
+    input_dir, output_dir, dataset_format, split_ratios, copy_files=True, seed=42
+):
+    if dataset_format == "yolo":
+        converter = YOLOConverter(seed=seed)
+    elif dataset_format == "coco":
+        converter = COCOConverter(seed=seed)
+    elif dataset_format == "luxonis-dataset":
+        converter = LuxonisDatasetConverter(seed=seed)
+    elif dataset_format == "cls-single":
+        converter = SingleLabelClsConverter(seed=seed)
+    else:
+        raise ValueError(f"Invalid dataset format: {dataset_format}")
+
+    converter.convert(input_dir, output_dir, split_ratios, copy_files)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert raw dataset to another format with train-val-test split."
+    )
+    parser.add_argument(
+        "--input_dir", type=str, help="Directory containing the images and annotations."
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        help="Directory where the processed dataset will be saved.",
+    )
+    parser.add_argument(
+        "--dataset_format",
+        type=str,
+        default="yolo",
+        choices=["yolo", "coco", "luxonis-dataset", "cls-single"],
+    )
+    parser.add_argument(
+        "--split_ratios",
+        type=float,
+        nargs="+",
+        default=[0.8, 0.1, 0.1],
+        help="Train-validation-test split ratios (default: 0.8, 0.1, 0.1).",
+    )
+    parser.add_argument(
+        "--copy_files",
+        type=bool,
+        default=True,
+        help="Copy files to output directory, otherwise move them.",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="Random seed for reproducibility.",
+    )
+
+    args = parser.parse_args()
+
+    convert_dataset(
+        args.input_dir,
+        args.output_dir,
+        args.dataset_format,
+        args.split_ratios,
+        args.copy_files,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/datadreamer/utils/convert_dataset_to_yolo.py b/datadreamer/utils/convert_dataset_to_yolo.py
deleted file mode 100644
index 83a985e..0000000
--- a/datadreamer/utils/convert_dataset_to_yolo.py
+++ /dev/null
@@ -1,157 +0,0 @@
-from __future__ import annotations
-
-import argparse
-import json
-import os
-import shutil
-
-import numpy as np
-from PIL import Image
-
-
-def read_annotations(annotation_path):
-    """Reads annotations from a JSON file located at the specified path.
-
-    Args:
-    - annotation_path (str): The path to the JSON file containing annotations.
-
-    Returns:
-    - dict: A dictionary containing the data loaded from the JSON file.
-    """
-    with open(annotation_path) as f:
-        data = json.load(f)
-    return data
-
-
-def convert_to_yolo_format(box, image_width, image_height):
-    """Converts bounding box coordinates to YOLO format.
-
-    Args:
-    - box (list of float): A list containing the bounding box coordinates [x_min, y_min, x_max, y_max].
-    - image_width (int): The width of the image.
-    - image_height (int): The height of the image.
-
-    Returns:
-    - list of float: A list containing the bounding box in YOLO format [x_center, y_center, width, height].
-    """
-    x_center = (box[0] + box[2]) / 2 / image_width
-    y_center = (box[1] + box[3]) / 2 / image_height
-    width = (box[2] - box[0]) / image_width
-    height = (box[3] - box[1]) / image_height
-    return [x_center, y_center, width, height]
-
-
-def process_data(data, image_dir, output_dir, split_ratio):
-    """Processes the data by dividing it into training and validation sets, and saves
-    the images and labels in YOLO format.
-
-    Args:
-    - data (dict): The dictionary containing image annotations.
-    - image_dir (str): The directory where the source images are located.
-    - output_dir (str): The base directory where the processed data will be saved.
-    - split_ratio (float): The ratio to split the data into training and validation sets.
-
-    No return value.
-    """
-    images = list(data.keys())
-    np.random.shuffle(images)
-
-    split_index = int(len(images) * split_ratio)
-    train_images = images[:split_index]
-    val_images = images[split_index:]
-
-    for dataset_type, image_set in [("train", train_images), ("val", val_images)]:
-        image_output_dir = os.path.join(output_dir, dataset_type, "images")
-        label_output_dir = os.path.join(output_dir, dataset_type, "labels")
-
-        # If the output directories already exist, replace them
-        if os.path.exists(image_output_dir):
-            shutil.rmtree(image_output_dir)
-        if os.path.exists(label_output_dir):
-            shutil.rmtree(label_output_dir)
-
-        os.makedirs(image_output_dir)
-        os.makedirs(label_output_dir)
-
-        for image_name in image_set:
-            if image_name == "class_names":
-                continue
-            # extract image name from image path
-            image_full_path = os.path.join(image_dir, image_name)
-            annotation = data[image_name]
-            image = Image.open(image_full_path)
-            image_width, image_height = image.size
-
-            label_file = os.path.join(
-                label_output_dir, os.path.splitext(image_name)[0] + ".txt"
-            )
-            with open(label_file, "w") as f:
-                for box, label in zip(annotation["boxes"], annotation["labels"]):
-                    yolo_box = convert_to_yolo_format(box, image_width, image_height)
-                    f.write(f"{label} {' '.join(map(str, yolo_box))}\n")
-
-            shutil.copy(image_full_path, os.path.join(image_output_dir, image_name))
-
-
-def create_data_yaml(root_dir, class_names):
-    """Creates a YAML file for dataset configuration, specifying paths and class names.
-
-    Args:
-    - root_dir (str): The root directory where the dataset is located.
-    - class_names (list of str): A list of class names.
-
-    No return value.
-    """
-    yaml_content = (
-        f"train: {os.path.abspath(os.path.join(root_dir, 'train'))}\n"
-        f"val: {os.path.abspath(os.path.join(root_dir, 'val'))}\n"
-        f"nc: {len(class_names)}\n"
-        f"names: {class_names}"
-    )
-    with open(os.path.join(root_dir, "data.yaml"), "w") as f:
-        f.write(yaml_content)
-
-
-def convert(dataset_dir, output_dir, train_val_split_ratio):
-    """Converts a dataset into a format suitable for training with YOLO, including
-    creating training and validation splits.
-
-    Args:
-    - dataset_dir (str): The directory where the source dataset is located.
-    - output_dir (str): The directory where the processed dataset should be saved.
-    - train_val_split_ratio (float): The ratio to split the dataset into training and validation sets.
-
-    No return value.
-    """
-    annotation_path = os.path.join(dataset_dir, "annotations.json")
-    data = read_annotations(annotation_path)
-    process_data(data, dataset_dir, output_dir, train_val_split_ratio)
-    create_data_yaml(output_dir, data["class_names"])
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Convert dataset to YOLO format with train-val split."
-    )
-    parser.add_argument(
-        "--save_dir", type=str, help="Directory containing the images and annotations."
-    )
-    parser.add_argument(
-        "--output_dir",
-        type=str,
-        help="Directory where the processed dataset will be saved.",
-    )
-    parser.add_argument(
-        "--split_ratio",
-        type=float,
-        default=0.8,
-        help="Train-validation split ratio (default: 0.8)",
-    )
-
-    args = parser.parse_args()
-
-    convert(args.save_dir, args.output_dir, args.split_ratio)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/datadreamer/utils/dataset_utils.py b/datadreamer/utils/dataset_utils.py
new file mode 100644
index 0000000..a396ae0
--- /dev/null
+++ b/datadreamer/utils/dataset_utils.py
@@ -0,0 +1,30 @@
+import json
+import os
+
+
+def save_annotations_to_json(
+    image_paths,
+    labels_list,
+    boxes_list=None,
+    class_names=None,
+    save_dir=None,
+    file_name="annotations.json",
+):
+    annotations = {}
+    for i in range(len(image_paths)):
+        # for image_path, bboxes, labels in zip(image_paths, boxes_list, labels_list):
+        image_name = os.path.basename(image_paths[i])
+        # image_name = os.path.basename(image_path)
+        labels = labels_list[i]
+        annotations[image_name] = {
+            "labels": labels.tolist(),
+        }
+        if boxes_list is not None:
+            bboxes = boxes_list[i]
+            annotations[image_name]["boxes"] = bboxes.tolist()
+
+    annotations["class_names"] = class_names
+
+    # Save to JSON file
+    with open(os.path.join(save_dir, file_name), "w") as f:
+        json.dump(annotations, f, indent=4)
diff --git a/datadreamer/utils/luxonis_dataset_converter.py b/datadreamer/utils/luxonis_dataset_converter.py
new file mode 100644
index 0000000..d9cb5a5
--- /dev/null
+++ b/datadreamer/utils/luxonis_dataset_converter.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+import os
+
+from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.data.utils.enums import BucketStorage
+from PIL import Image
+
+from datadreamer.utils import BaseConverter
+
+
+class LuxonisDatasetConverter(BaseConverter):
+    """Class for converting a dataset to LuxonisDataset format."""
+
+    def __init__(self, seed=42):
+        super().__init__(seed)
+
+    def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True):
+        """Converts a dataset into a LuxonisDataset format.
+
+        Args:
+        - dataset_dir (str): The directory where the source dataset is located.
+        - output_dir (str): The directory where the processed dataset should be saved.
+        - split_ratios (list of float): The ratios to split the data into training, validation, and test sets.
+        - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True.
+
+        No return value.
+        """
+        annotation_path = os.path.join(dataset_dir, "annotations.json")
+        data = BaseConverter.read_annotations(annotation_path)
+        self.process_data(data, dataset_dir, output_dir, split_ratios)
+
+    def process_data(self, data, dataset_dir, output_dir, split_ratios):
+        class_names = data["class_names"]
+        image_paths = list(data.keys())
+        image_paths.remove("class_names")
+
+        def dataset_generator():
+            # find image paths and load COCO annotations
+
+            for image_path in image_paths:
+                image_full_path = os.path.join(dataset_dir, image_path)
+                width, height = Image.open(image_full_path).size
+                labels = data[image_path]["labels"]
+                for label in labels:
+                    yield {
+                        "file": image_full_path,
+                        "class": class_names[label],
+                        "type": "classification",
+                        "value": True,
+                    }
+
+                if "boxes" in data[image_path]:
+                    boxes = data[image_path]["boxes"]
+                    for box in boxes:
+                        x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1]
+                        yield {
+                            "file": image_full_path,
+                            "class": class_names[label],
+                            "type": "box",
+                            "value": (x / width, y / height, w / width, h / height),
+                        }
+
+        dataset_name = os.path.basename(output_dir)
+        if LuxonisDataset.exists(dataset_name):
+            dataset = LuxonisDataset(dataset_name)
+            dataset.delete_dataset()
+
+        # if LUXONISML_BUCKET and GOOGLE_APPLICATION_CREDENTIALS are set, use GCS bucket
+        if (
+            "LUXONISML_BUCKET" in os.environ
+            and "GOOGLE_APPLICATION_CREDENTIALS" in os.environ
+        ):
+            dataset = LuxonisDataset(dataset_name, bucket_storage=BucketStorage.GCS)
+            print("Using GCS bucket")
+        else:
+            dataset = LuxonisDataset(dataset_name)
+            print("Using local dataset")
+        dataset.set_classes(class_names)
+
+        dataset.add(dataset_generator)
+
+        dataset.make_splits(split_ratios)
diff --git a/datadreamer/utils/merge_raw_datasets.py b/datadreamer/utils/merge_raw_datasets.py
new file mode 100644
index 0000000..47c1dc0
--- /dev/null
+++ b/datadreamer/utils/merge_raw_datasets.py
@@ -0,0 +1,100 @@
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import shutil
+
+
+def merge_datasets(input_dirs, output_dir, copy_files=True):
+    config_tasks = []
+    config_classes = []
+    random_seeds = []
+    for input_dir in input_dirs:
+        with open(os.path.join(input_dir, "generation_args.json")) as f:
+            generation_args = json.load(f)
+        config_tasks.append(generation_args["task"])
+        config_classes.append(generation_args["class_names"])
+        random_seeds.append(generation_args["seed"])
+
+    # Check if all tasks are the same
+    if len(set(config_tasks)) != 1:
+        raise ValueError("All datasets must have the same task")
+    # Check if all list of classes are the same
+    if len(set(tuple(sorted(classes)) for classes in config_classes)) != 1:
+        raise ValueError("All datasets must have the same list of classes")
+
+    # Check if all datasets have different random seeds
+    if len(set(random_seeds)) != len(input_dirs):
+        raise ValueError("All datasets must have different random seeds")
+
+    # Create output directory
+    print(f"Output directory: {output_dir}")
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir)
+
+    annotations_merged = {}
+    for i, input_dir in enumerate(input_dirs):
+        with open(os.path.join(input_dir, "annotations.json")) as f:
+            annotations = json.load(f)
+            class_names = annotations.pop("class_names")
+            annotations_merged = {**annotations_merged, **annotations}
+
+        # Copy or move generation_args.yaml files
+        if copy_files:
+            shutil.copy(
+                os.path.join(input_dir, "generation_args.yaml"),
+                os.path.join(output_dir, f"generation_args_{i}.json"),
+            )
+        else:
+            shutil.move(
+                os.path.join(input_dir, "generation_args.yaml"),
+                os.path.join(output_dir, f"generation_args_{i}.json"),
+            )
+
+        # Copy or move images
+        for image_path in annotations:
+            if copy_files:
+                shutil.copy(
+                    os.path.join(input_dir, image_path),
+                    os.path.join(output_dir, image_path),
+                )
+            else:
+                shutil.move(
+                    os.path.join(input_dir, image_path),
+                    os.path.join(output_dir, image_path),
+                )
+
+    annotations_merged["class_names"] = class_names
+    with open(os.path.join(output_dir, "annotations.json"), "w") as f:
+        json.dump(annotations_merged, f, indent=4)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Merge raw datasets")
+    parser.add_argument(
+        "--input_dirs",
+        type=str,
+        nargs="+",
+        help="Directories containing the images and annotations.",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        help="Directory where the merged dataset will be saved.",
+    )
+    parser.add_argument(
+        "--copy_files",
+        type=bool,
+        default=True,
+        help="Copy files to output directory, otherwise move them.",
+    )
+
+    args = parser.parse_args()
+
+    merge_datasets(args.input_dirs, args.output_dir, args.copy_files)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/datadreamer/utils/single_label_cls_converter.py b/datadreamer/utils/single_label_cls_converter.py
new file mode 100644
index 0000000..e5515d5
--- /dev/null
+++ b/datadreamer/utils/single_label_cls_converter.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import os
+import shutil
+
+from datadreamer.utils import BaseConverter
+
+
+class SingleLabelClsConverter(BaseConverter):
+    """Class for converting a dataset for single-label classification task.
+
+    NOTE: The number of images after conversion may be different from the number of images in the original dataset, as images with zero or more than one labels are removed.
+
+    Format:
+
+    dataset_dir
+    ├── train
+    │   ├── class_1
+    │   │   ├── image1.jpg
+    │   │   ├── image2.jpg
+    │   ├── class_2
+    │   │   ├── image3.jpg
+    │   │   ├── image4.jpg
+    ├── val
+    │   ├── class_1
+    │   ├── class_2
+    ├── test
+    │   ├── class_1
+    │   ├── class_2
+    """
+
+    def __init__(self, seed=42):
+        super().__init__(seed)
+
+    def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True):
+        """Converts a dataset into a format suitable for single-label classification.
+
+        Args:
+        - dataset_dir (str): The directory where the source dataset is located.
+        - output_dir (str): The directory where the processed dataset should be saved.
+        - split_ratios (list of float): The ratios to split the data into training, validation, and test sets.
+        - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True.
+
+        No return value.
+        """
+        annotation_path = os.path.join(dataset_dir, "annotations.json")
+        data = BaseConverter.read_annotations(annotation_path)
+        self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files)
+
+    def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True):
+        """Processes the data by removing images with multiple labels, then dividing it
+        into training and validation sets, and saves the images with single labels.
+
+        Args:
+        - data (dict): The dictionary containing image annotations.
+        - image_dir (str): The directory where the source images are located.
+        - output_dir (str): The base directory where the processed data will be saved.
+        - split_ratios (float): The ratio to split the data into training, validation, and test sets.
+        - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True.
+
+        No return value.
+        """
+        images = list(data.keys())
+        class_names = data["class_names"]
+        images.remove("class_names")
+
+        print(f"Number of images: {len(images)}")
+
+        # Remove images with multiple labels
+        single_label_images = [img for img in images if len(data[img]["labels"]) == 1]
+
+        print(f"Number of images with single label: {len(single_label_images)}")
+
+        # Split the data into training, validation, and test sets
+        train_images, val_images, test_images = BaseConverter.make_splits(
+            single_label_images, split_ratios
+        )
+
+        for dataset_type, image_set in [
+            ("train", train_images),
+            ("val", val_images),
+            ("test", test_images),
+        ]:
+            if os.path.exists(os.path.join(output_dir, dataset_type)):
+                shutil.rmtree(os.path.join(output_dir, dataset_type))
+            for label in class_names:
+                image_output_dir = os.path.join(output_dir, dataset_type, label)
+                os.makedirs(image_output_dir)
+
+            for image_name in image_set:
+                annotation = data[image_name]
+                label = class_names[annotation["labels"][0]]
+                image_full_path = os.path.join(image_dir, image_name)
+                if copy_files:
+                    shutil.copy(
+                        image_full_path,
+                        os.path.join(output_dir, dataset_type, label, image_name),
+                    )
+                else:
+                    shutil.move(
+                        image_full_path,
+                        os.path.join(output_dir, dataset_type, label, image_name),
+                    )
diff --git a/datadreamer/utils/yolo_converter.py b/datadreamer/utils/yolo_converter.py
new file mode 100644
index 0000000..36452da
--- /dev/null
+++ b/datadreamer/utils/yolo_converter.py
@@ -0,0 +1,152 @@
+from __future__ import annotations
+
+import os
+import shutil
+
+from PIL import Image
+
+from datadreamer.utils import BaseConverter
+
+
+class YOLOConverter(BaseConverter):
+    """Class for converting a dataset to YOLO format.
+
+    Format:
+
+    dataset_dir
+    ├── train
+    │   ├── images
+    │   │   ├── 0.jpg
+    │   │   ├── 1.jpg
+    │   ├── labels
+    │   │   ├── 0.txt
+    │   │   ├── 1.txt
+    ├── val
+    │   ├── images
+    │   ├── labels
+    ├── test
+    │   ├── images
+    │   ├── labels
+    """
+
+    def __init__(self, seed=42):
+        super().__init__(seed)
+
+    def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True):
+        """Converts a dataset into a format suitable for training with YOLO, including
+        creating training and validation splits.
+
+        Args:
+        - dataset_dir (str): The directory where the source dataset is located.
+        - output_dir (str): The directory where the processed dataset should be saved.
+        - split_ratios (list of float): The ratios to split the data into training, validation, and test sets.
+        - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True.
+
+        No return value.
+        """
+        annotation_path = os.path.join(dataset_dir, "annotations.json")
+        data = BaseConverter.read_annotations(annotation_path)
+        self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files)
+
+    def convert_to_yolo_format(self, box, image_width, image_height):
+        """Converts bounding box coordinates to YOLO format.
+
+        Args:
+        - box (list of float): A list containing the bounding box coordinates [x_min, y_min, x_max, y_max].
+        - image_width (int): The width of the image.
+        - image_height (int): The height of the image.
+
+        Returns:
+        - list of float: A list containing the bounding box in YOLO format [x_center, y_center, width, height].
+        """
+        x_center = (box[0] + box[2]) / 2 / image_width
+        y_center = (box[1] + box[3]) / 2 / image_height
+        width = (box[2] - box[0]) / image_width
+        height = (box[3] - box[1]) / image_height
+        return [x_center, y_center, width, height]
+
+    def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True):
+        """Processes the data by dividing it into training and validation sets, and
+        saves the images and labels in YOLO format.
+
+        Args:
+        - data (dict): The dictionary containing image annotations.
+        - image_dir (str): The directory where the source images are located.
+        - output_dir (str): The base directory where the processed data will be saved.
+        - split_ratios (float): The ratio to split the data into training, validation, and test sets.
+        - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True.
+
+
+        No return value.
+        """
+        images = list(data.keys())
+        images.remove("class_names")
+
+        train_images, val_images, test_images = BaseConverter.make_splits(
+            images, split_ratios
+        )
+
+        for dataset_type, image_set in [
+            ("train", train_images),
+            ("val", val_images),
+            ("test", test_images),
+        ]:
+            image_output_dir = os.path.join(output_dir, dataset_type, "images")
+            label_output_dir = os.path.join(output_dir, dataset_type, "labels")
+
+            # If the output directories already exist, replace them
+            if os.path.exists(image_output_dir):
+                shutil.rmtree(image_output_dir)
+            if os.path.exists(label_output_dir):
+                shutil.rmtree(label_output_dir)
+
+            os.makedirs(image_output_dir)
+            os.makedirs(label_output_dir)
+
+            for image_name in image_set:
+                # extract image name from image path
+                image_full_path = os.path.join(image_dir, image_name)
+                annotation = data[image_name]
+                image = Image.open(image_full_path)
+                image_width, image_height = image.size
+
+                label_file = os.path.join(
+                    label_output_dir, os.path.splitext(image_name)[0] + ".txt"
+                )
+                with open(label_file, "w") as f:
+                    for box, label in zip(annotation["boxes"], annotation["labels"]):
+                        yolo_box = self.convert_to_yolo_format(
+                            box, image_width, image_height
+                        )
+                        f.write(f"{label} {' '.join(map(str, yolo_box))}\n")
+
+                if copy_files:
+                    shutil.copy(
+                        image_full_path, os.path.join(image_output_dir, image_name)
+                    )
+                else:
+                    shutil.move(
+                        image_full_path, os.path.join(image_output_dir, image_name)
+                    )
+
+        self.create_data_yaml(output_dir, data["class_names"])
+
+    def create_data_yaml(self, root_dir, class_names):
+        """Creates a YAML file for dataset configuration, specifying paths and class
+        names.
+
+        Args:
+        - root_dir (str): The root directory where the dataset is located.
+        - class_names (list of str): A list of class names.
+
+        No return value.
+        """
+        yaml_content = (
+            f"train: {os.path.abspath(os.path.join(root_dir, 'train'))}\n"
+            f"val: {os.path.abspath(os.path.join(root_dir, 'val'))}\n"
+            f"test: {os.path.abspath(os.path.join(root_dir, 'test'))}\n"
+            f"nc: {len(class_names)}\n"
+            f"names: {class_names}"
+        )
+        with open(os.path.join(root_dir, "data.yaml"), "w") as f:
+            f.write(yaml_content)
diff --git a/examples/generate_dataset_and_train_yolo.ipynb b/examples/generate_dataset_and_train_yolo.ipynb
index b113b7e..77344cf 100644
--- a/examples/generate_dataset_and_train_yolo.ipynb
+++ b/examples/generate_dataset_and_train_yolo.ipynb
@@ -78,6 +78,8 @@
         "- `--prompts_number` (optional): Number of prompts to generate for each object. Defaults to `10`.\n",
         "- `--annotate_only` (optional): Only annotate the images without generating new ones, prompt and image generator will be skipped. Defaults to `False`.\n",
         "- `--task`: Choose between detection and classification. Default is `detection`.\n",
+        "- `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`.\n",
+        "- `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`.\n",
         "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n",
         "- `--prompt_generator`: Choose between `simple`, `lm` (language model) and `tiny` (tiny LM). Default is `simple`.\n",
         "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.\n",
@@ -87,17 +89,18 @@
         "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n",
         "- `--prompt_suffix`: Suffix to add to every image generation prompt, e.g., for adding details like resolution. Default is `\", hd, 8k, highly detailed\"`.\n",
         "- `--negative_prompt`: Negative prompts to guide the generation away from certain features. Default is `\"cartoon, blue skin, painting, scrispture, golden, illustration, worst quality, low quality, normal quality:2, unrealistic dream, low resolution,  static, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, bad anatomy\"`.\n",
-        "- `--use_tta`: Toggle test time augmentation for object detection. Default is `True`.\n",
+        "- `--use_tta`: Toggle test time augmentation for object detection. Default is `False`.\n",
         "- `--synonym_generator`: Enhance class names with synonyms. Default is `none`. Other options are `llm`, `wordnet`.\n",
         "- `--use_image_tester`: Use image tester for image generation. Default is `False`.\n",
         "- `--image_tester_patience`: Patience level for image tester. Default is `1`.\n",
         "- `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`.\n",
         "- `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`.\n",
         "- `--batch_size_prompt`: Batch size for prompt generation. Default is 64.\n",
-        "- `--batch_size_annotation`: Batch size for annotation. Default is `8`.\n",
+        "- `--batch_size_annotation`: Batch size for annotation. Default is `1`.\n",
         "- `--batch_size_image`: Batch size for image generation. Default is `1`.\n",
         "- `--device`: Choose between `cuda` and `cpu`. Default is `cuda`.\n",
-        "- `--seed`: Set a random seed for image and prompt generation. Default is `42`.\n"
+        "- `--seed`: Set a random seed for image and prompt generation. Default is `42`.\n",
+        "- `--config`: A path to an optional `.yaml` config file specifying the pipeline's arguments.\n"
       ]
     },
     {
@@ -144,26 +147,26 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 1,
       "id": "3dd01a6a",
       "metadata": {
         "id": "3dd01a6a"
       },
       "outputs": [],
       "source": [
-        "from datadreamer.utils.convert_dataset_to_yolo import convert"
+        "from datadreamer.utils.convert_dataset import convert_dataset"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
       "id": "9b9bb74d",
       "metadata": {
         "id": "9b9bb74d"
       },
       "outputs": [],
       "source": [
-        "convert(dataset_dir=\"generated_dataset\", output_dir=\"generated_dataset_yolo\", train_val_split_ratio=0.8)"
+        "convert_dataset(input_dir=\"generated_dataset\", output_dir=\"generated_dataset_yolo\", dataset_format=\"yolo\", split_ratios=[0.8, 0.1, 0.1], copy_files=True)"
       ]
     },
     {
@@ -425,7 +428,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.8.10"
+      "version": "3.11.7"
     }
   },
   "nbformat": 4,
diff --git a/examples/helmet_detection.ipynb b/examples/helmet_detection.ipynb
index 7e48cdb..89406b2 100644
--- a/examples/helmet_detection.ipynb
+++ b/examples/helmet_detection.ipynb
@@ -70,9 +70,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from datadreamer.utils.convert_dataset_to_yolo import convert\n",
+    "from datadreamer.utils.convert_dataset import convert_dataset\n",
     "# Conversion to YOLO format\n",
-    "convert(dataset_dir=\"gen_dataset_helmet_10000_turbo_tiny\", output_dir=\"gen_dataset_helmet_10000_turbo_tiny_yolo\", train_val_split_ratio=0.95)"
+    "convert_dataset(input_dir=\"gen_dataset_helmet_10000_turbo_tiny\", output_dir=\"gen_dataset_helmet_10000_turbo_tiny_yolo\", dataset_format=\"yolo\", split_ratios=[0.95, 0.05, 0.0], copy_files=True)"
    ]
   },
   {
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 4f8c185..f9eb6b4 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">50%</text>
-        <text x="80" y="14">50%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">55%</text>
+        <text x="80" y="14">55%</text>
     </g>
 </svg>
diff --git a/pyproject.toml b/pyproject.toml
index 6db0786..d7aae02 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "datadreamer"
-version = "0.1.4"
+version = "0.1.5"
 description = "A library for dataset generation and knowledge extraction from foundation computer vision models."
 readme = "README.md"
 requires-python = ">=3.8"
diff --git a/requirements.txt b/requirements.txt
index e96fe3d..67955c4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,5 @@ accelerate>=0.25.0
 scipy>=1.10.0
 bitsandbytes>=0.42.0
 nltk>=3.8.1
+luxonis-ml[all]>=0.1.0
+python-box>=7.1.1
\ No newline at end of file
diff --git a/tests/integration/sample_config.yaml b/tests/integration/sample_config.yaml
new file mode 100644
index 0000000..0feb485
--- /dev/null
+++ b/tests/integration/sample_config.yaml
@@ -0,0 +1,7 @@
+class_names: [alien, mars, cat]
+prompts_number: 1
+prompt_generator: simple
+num_objects_range: [2, 3]
+image_generator: sdxl-turbo
+conf_threshold: 0.15
+seed: 43
\ No newline at end of file
diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py
index c665e29..293d3a7 100644
--- a/tests/integration/test_pipeline.py
+++ b/tests/integration/test_pipeline.py
@@ -21,13 +21,23 @@ def _check_detection_pipeline(cmd: str, target_folder: str):
     assert os.path.isdir(target_folder), "Directory not created"
     files = [
         "annotations.json",
-        "generation_args.json",
-        "image_0.jpg",
+        "generation_args.yaml",
         "prompts.json",
     ]
     # Check that all the files were created
     for file in files:
         assert os.path.isfile(os.path.join(target_folder, file)), f"{file} not created"
+    # Check that an image with an unique was created
+    assert (
+        len(
+            list(
+                filter(
+                    lambda x: "image_" in x and ".jpg" in x, os.listdir(target_folder)
+                )
+            )
+        )
+        > 0
+    ), "Images not created"
     # Check that the "bboxes_visualization" folder was created
     assert os.path.isdir(
         os.path.join(target_folder, "bboxes_visualization")
@@ -1046,3 +1056,82 @@ def test_cuda_tiny_sdxl_classification_pipeline():
     )
     # Check the run of the pipeline
     _check_detection_pipeline(cmd, target_folder)
+
+
+# =========================================================
+# TEST WITH CONFIG FILE
+# =========================================================
+@pytest.mark.skipif(
+    total_memory < 16 or total_disk_space < 35,
+    reason="Test requires at least 16GB of RAM and 35GB of HDD",
+)
+def test_cpu_simple_sdxl_turbo_config_detection_pipeline():
+    # Define target folder
+    target_folder = "data/data-det-cpu-simple-sdxl-turbo-config/"
+    # Define the command to run the datadreamer
+    cmd = (
+        f"datadreamer --save_dir {target_folder} "
+        f"--num_objects_range 1 2 "
+        f"--config ./sample_config.yaml "
+        f"--device cpu"
+    )
+    # Check the run of the pipeline
+    _check_detection_pipeline(cmd, target_folder)
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35,
+    reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD",
+)
+def test_cuda_simple_sdxl_turbo_config_detection_pipeline():
+    # Define target folder
+    target_folder = "data/data-det-cuda-simple-sdxl-turbo-config/"
+    # Define the command to run the datadreamer
+    cmd = (
+        f"datadreamer --save_dir {target_folder} "
+        f"--num_objects_range 1 2 "
+        f"--config ./sample_config.yaml "
+        f"--device cuda"
+    )
+    # Check the run of the pipeline
+    _check_detection_pipeline(cmd, target_folder)
+
+
+@pytest.mark.skipif(
+    total_memory < 16 or total_disk_space < 35,
+    reason="Test requires at least 16GB of RAM and 35GB of HDD",
+)
+def test_cpu_simple_sdxl_turbo_config_classification_pipeline():
+    # Define target folder
+    target_folder = "data/data-cls-cpu-simple-sdxl-turbo-config/"
+    # Define the command to run the datadreamer
+    cmd = (
+        f"datadreamer --task classification "
+        f"--save_dir {target_folder} "
+        f"--num_objects_range 1 2 "
+        f"--image_annotator clip "
+        f"--config ./sample_config.yaml "
+        f"--device cpu"
+    )
+    # Check the run of the pipeline
+    _check_detection_pipeline(cmd, target_folder)
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35,
+    reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD",
+)
+def test_cuda_simple_sdxl_turbo_config_classification_pipeline():
+    # Define target folder
+    target_folder = "data/data-cls-cuda-simple-sdxl-turbo-config/"
+    # Define the command to run the datadreamer
+    cmd = (
+        f"datadreamer --task classification "
+        f"--save_dir {target_folder} "
+        f"--num_objects_range 1 2 "
+        f"--image_annotator clip "
+        f"--config ./sample_config.yaml "
+        f"--device cuda"
+    )
+    # Check the run of the pipeline
+    _check_detection_pipeline(cmd, target_folder)
diff --git a/tests/unittests/test_converters.py b/tests/unittests/test_converters.py
new file mode 100644
index 0000000..f56ba71
--- /dev/null
+++ b/tests/unittests/test_converters.py
@@ -0,0 +1,330 @@
+import json
+import os
+import shutil
+import unittest
+
+from PIL import Image
+
+from datadreamer.utils import (
+    BaseConverter,
+    COCOConverter,
+    SingleLabelClsConverter,
+    YOLOConverter,
+)
+
+
+class TestBaseConverter(unittest.TestCase):
+    def setUp(self):
+        self.test_dir = "test_dataset"
+        os.makedirs(self.test_dir, exist_ok=True)
+
+        # Create sample annotations
+        self.annotations = {
+            "class_names": ["cat", "dog"],
+            "0.jpg": {"boxes": [[10, 10, 50, 50]], "labels": [0]},
+            "1.jpg": {"boxes": [[20, 20, 70, 70]], "labels": [1]},
+        }
+        with open(os.path.join(self.test_dir, "annotations.json"), "w") as f:
+            json.dump(self.annotations, f)
+
+        # Create sample images
+        open(os.path.join(self.test_dir, "0.jpg"), "a").close()
+        open(os.path.join(self.test_dir, "1.jpg"), "a").close()
+
+    def tearDown(self):
+        os.remove(os.path.join(self.test_dir, "annotations.json"))
+        os.remove(os.path.join(self.test_dir, "0.jpg"))
+        os.remove(os.path.join(self.test_dir, "1.jpg"))
+        os.rmdir(self.test_dir)
+
+    def test_read_annotations(self):
+        annotation_path = os.path.join(self.test_dir, "annotations.json")
+        data = BaseConverter.read_annotations(annotation_path)
+        self.assertEqual(data, self.annotations)
+
+    def test_make_splits(self):
+        images = ["0.jpg", "1.jpg"]
+        split_ratios = [0.5, 0.5, 0.0]
+        train_images, val_images, test_images = BaseConverter.make_splits(
+            images, split_ratios, shuffle=False
+        )
+
+        self.assertEqual(len(train_images), 1)
+        self.assertEqual(len(val_images), 1)
+        self.assertEqual(len(test_images), 0)
+        self.assertTrue("0.jpg" in train_images)
+        self.assertTrue("1.jpg" in val_images)
+
+
+class TestCOCOConverter(unittest.TestCase):
+    def setUp(self):
+        self.test_dir = "test_dataset"
+        os.makedirs(self.test_dir, exist_ok=True)
+
+        # Create sample images
+        self.image_size = (100, 100)
+        self.create_sample_image("0.jpg")
+        self.create_sample_image("1.jpg")
+
+        # Create sample labels
+        self.labels = {
+            "class_names": ["cat", "dog"],
+            "0.jpg": {"boxes": [(10, 10, 50, 50)], "labels": [0]},
+            "1.jpg": {"boxes": [(20, 20, 70, 70)], "labels": [1]},
+        }
+        with open(os.path.join(self.test_dir, "annotations.json"), "w") as f:
+            json.dump(self.labels, f)
+
+    def tearDown(self):
+        shutil.rmtree(self.test_dir)
+        if hasattr(self, "output_dir") and os.path.exists(self.output_dir):
+            shutil.rmtree(self.output_dir)
+
+    def create_sample_image(self, filename):
+        image = Image.new("RGB", self.image_size, color="white")
+        image.save(os.path.join(self.test_dir, filename))
+
+    def test_convert(self):
+        self.output_dir = "output_dir"
+        split_ratios = [0.6, 0.2, 0.2]
+        converter = COCOConverter()
+        converter.convert(self.test_dir, self.output_dir, split_ratios, copy_files=True)
+
+        self.assertTrue(os.path.exists(self.output_dir))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "train")))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "validation")))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "test")))
+
+        # Test whether labels.json files exist in all output directories
+        self.assertTrue(
+            os.path.exists(os.path.join(self.output_dir, "train", "labels.json"))
+        )
+        self.assertTrue(
+            os.path.exists(os.path.join(self.output_dir, "validation", "labels.json"))
+        )
+        self.assertTrue(
+            os.path.exists(os.path.join(self.output_dir, "test", "labels.json"))
+        )
+
+    def test_process_data(self):
+        self.output_dir = "output_dir"
+        split_ratios = [0.6, 0.2, 0.2]
+        converter = COCOConverter()
+        converter.process_data(
+            self.labels, self.test_dir, self.output_dir, split_ratios, copy_files=True
+        )
+
+        self.assertTrue(os.path.exists(self.output_dir))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "train")))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "validation")))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "test")))
+
+        # Test whether labels.json files exist in all output directories
+        self.assertTrue(
+            os.path.exists(os.path.join(self.output_dir, "train", "labels.json"))
+        )
+        self.assertTrue(
+            os.path.exists(os.path.join(self.output_dir, "validation", "labels.json"))
+        )
+        self.assertTrue(
+            os.path.exists(os.path.join(self.output_dir, "test", "labels.json"))
+        )
+
+    def test_save_labels(self):
+        self.output_dir = "output_dir"
+        converter = COCOConverter()
+        images_info = [
+            {"id": 1, "file_name": "0.jpg", "width": 100, "height": 100},
+            {"id": 2, "file_name": "1.jpg", "width": 100, "height": 100},
+        ]
+        annotations = [
+            {
+                "id": 1,
+                "image_id": 1,
+                "category_id": 0,
+                "bbox": [10, 10, 40, 40],
+                "segmentation": None,
+                "area": 1200,
+                "iscrowd": 0,
+            },
+            {
+                "id": 2,
+                "image_id": 2,
+                "category_id": 1,
+                "bbox": [20, 20, 50, 50],
+                "segmentation": None,
+                "area": 1500,
+                "iscrowd": 0,
+            },
+        ]
+        class_names = ["cat", "dog"]
+
+        # Test whether labels.json file is saved correctly
+        os.makedirs(self.output_dir)
+        converter.save_labels(self.output_dir, images_info, annotations, class_names)
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "labels.json")))
+
+        # Test whether the content of labels.json is correct
+        with open(os.path.join(self.output_dir, "labels.json"), "r") as f:
+            saved_labels = json.load(f)
+
+        self.assertEqual(saved_labels["images"], images_info)
+        self.assertEqual(saved_labels["annotations"], annotations)
+        self.assertEqual(
+            saved_labels["categories"],
+            [{"id": i, "name": name} for i, name in enumerate(class_names)],
+        )
+
+
+class TestYOLOConverter(unittest.TestCase):
+    def setUp(self):
+        self.test_dir = "test_dataset"
+        os.makedirs(self.test_dir, exist_ok=True)
+
+        # Create sample images
+        self.image_size = (100, 100)
+        self.create_sample_image("0.jpg")
+        self.create_sample_image("1.jpg")
+
+        # Create sample labels
+        self.labels = {
+            "class_names": ["cat", "dog"],
+            "0.jpg": {"boxes": [(10, 10, 50, 50)], "labels": [0]},
+            "1.jpg": {"boxes": [(20, 20, 70, 70)], "labels": [1]},
+        }
+        with open(os.path.join(self.test_dir, "annotations.json"), "w") as f:
+            json.dump(self.labels, f)
+
+    def tearDown(self):
+        shutil.rmtree(self.test_dir)
+        if hasattr(self, "output_dir") and os.path.exists(self.output_dir):
+            shutil.rmtree(self.output_dir)
+
+    def create_sample_image(self, filename):
+        image = Image.new("RGB", self.image_size, color="white")
+        image.save(os.path.join(self.test_dir, filename))
+
+    def test_convert_to_yolo_format(self):
+        converter = YOLOConverter()
+        yolo_format = converter.convert_to_yolo_format([10, 10, 50, 50], 100, 100)
+        self.assertEqual(yolo_format, [0.3, 0.3, 0.4, 0.4])
+
+    def test_process_data(self):
+        self.output_dir = "output_dir"
+        split_ratios = [1, 0, 0]
+        converter = YOLOConverter()
+        converter.process_data(
+            self.labels, self.test_dir, self.output_dir, split_ratios, copy_files=True
+        )
+
+        self.assertTrue(os.path.exists(self.output_dir))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "train")))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "val")))
+        self.assertTrue(os.path.exists(os.path.join(self.output_dir, "test")))
+
+        # Test whether labels files exist in all output directories
+        train_label_file = os.path.join(self.output_dir, "train", "labels", "0.txt")
+        self.assertTrue(os.path.exists(train_label_file))
+        with open(train_label_file, "r") as f:
+            content = f.read()
+            self.assertEqual(content.strip(), "0 0.3 0.3 0.4 0.4")
+
+    def test_create_data_yaml(self):
+        self.output_dir = "output_dir"
+        converter = YOLOConverter()
+        class_names = ["cat", "dog"]
+        os.makedirs(self.output_dir, exist_ok=True)
+        converter.create_data_yaml(self.output_dir, class_names)
+
+        yaml_file = os.path.join(self.output_dir, "data.yaml")
+        self.assertTrue(os.path.exists(yaml_file))
+
+        with open(yaml_file, "r") as f:
+            content = f.read()
+            self.assertIn("train:", content)
+            self.assertIn("val:", content)
+            self.assertIn("test:", content)
+            self.assertIn("nc: 2", content)
+            self.assertIn("names: ['cat', 'dog']", content)
+
+
+class TestSingleLabelClsConverter(unittest.TestCase):
+    def setUp(self):
+        self.converter = SingleLabelClsConverter()
+        self.output_dir = "output_dir"
+        self.split_ratios = [0.6, 0.2, 0.2]
+        self.class_names = ["class_1", "class_2"]
+
+        # Create a temporary test dataset directory with annotations.json
+        self.dataset_dir = "test_dataset"
+        os.makedirs(self.dataset_dir)
+        self.image_size = (100, 100)
+        self.create_sample_image("image1.jpg")
+        self.create_sample_image("image2.jpg")
+        self.create_sample_image("image3.jpg")
+        annotations = {
+            "class_names": self.class_names,
+            "image1.jpg": {"labels": [0]},
+            "image2.jpg": {"labels": [1]},
+            "image3.jpg": {"labels": [0, 1]},  # Image with multiple labels
+        }
+        with open(os.path.join(self.dataset_dir, "annotations.json"), "w") as f:
+            json.dump(annotations, f)
+
+    def tearDown(self):
+        if os.path.exists(self.output_dir):
+            shutil.rmtree(self.output_dir)
+        if os.path.exists(self.dataset_dir):
+            shutil.rmtree(self.dataset_dir)
+
+    def create_sample_image(self, filename):
+        image = Image.new("RGB", self.image_size, color="white")
+        image.save(os.path.join(self.dataset_dir, filename))
+
+    def test_convert(self):
+        # Call the convert method
+        self.converter.convert(self.dataset_dir, self.output_dir, self.split_ratios)
+
+        # Check if output directories are created
+        for dataset_type in ["train", "val", "test"]:
+            for label in self.class_names:
+                self.assertTrue(
+                    os.path.exists(os.path.join(self.output_dir, dataset_type, label))
+                )
+
+        # Check if images with multiple labels are removed
+        self.assertFalse(
+            os.path.exists(
+                os.path.join(self.output_dir, "train", "class_1", "image3.jpg")
+            )
+        )
+        # Create a dummy annotations file
+        annotations = {
+            "class_names": self.class_names,
+            "image1.jpg": {"labels": [0]},
+            "image2.jpg": {"labels": [1]},
+            "image3.jpg": {"labels": [0, 1]},  # Image with multiple labels
+        }
+        with open(os.path.join(self.dataset_dir, "annotations.json"), "w") as f:
+            json.dump(annotations, f)
+
+        # Call the convert method
+        self.converter.convert(self.dataset_dir, self.output_dir, self.split_ratios)
+
+        # Check if output directories are created
+        for dataset_type in ["train", "val", "test"]:
+            for label in self.class_names:
+                self.assertTrue(
+                    os.path.exists(os.path.join(self.output_dir, dataset_type, label))
+                )
+
+        # Check if images with multiple labels are removed
+        self.assertFalse(
+            os.path.exists(
+                os.path.join(self.output_dir, "train", "class_1", "image3.jpg")
+            )
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()